[PATCH] update genext2fs.c to CVS rev 1.118

See http://genext2fs.cvs.sourceforge.net/viewvc/genext2fs/genext2fs/genext2fs.c?view=log
for details.

Numerous bugfixes, large file and filesystem support, rev 1 filesystems,
volume id support, block size, ..

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
---
 cache.h     |  128 ++++
 genext2fs.c | 1870 ++++++++++++++++++++++++++++++++++++++++++------------------
 list.h      |   78 ++
 3 files changed, 1527 insertions(+), 549 deletions(-)

Index: genext2fs-1.4.1/genext2fs.c
===================================================================
--- genext2fs-1.4.1.orig/genext2fs.c
+++ genext2fs-1.4.1/genext2fs.c
@@ -53,6 +53,12 @@
 // 			along with -q, -P, -U
 
 
+/*
+ * Allow fseeko/off_t to be 64-bit offsets to allow filesystems and
+ * individual files >2GB.
+ */
+#define _FILE_OFFSET_BITS 64
+
 #include <config.h>
 #include <stdio.h>
 
@@ -107,10 +113,8 @@
 
 #if HAVE_DIRENT_H
 # include <dirent.h>
-# define NAMLEN(dirent) strlen((dirent)->d_name)
 #else
 # define dirent direct
-# define NAMLEN(dirent) (dirent)->d_namlen
 # if HAVE_SYS_NDIR_H
 #  include <sys/ndir.h>
 # endif
@@ -144,6 +148,8 @@
 # include <limits.h>
 #endif
 
+#include "cache.h"
+
 struct stats {
 	unsigned long nblocks;
 	unsigned long ninodes;
@@ -151,13 +157,42 @@
 
 // block size
 
-#define BLOCKSIZE         1024
+static int blocksize = 1024;
+
+#define SUPERBLOCK_OFFSET	1024
+#define SUPERBLOCK_SIZE		1024
+
+#define BLOCKSIZE         blocksize
 #define BLOCKS_PER_GROUP  8192
 #define INODES_PER_GROUP  8192
 /* Percentage of blocks that are reserved.*/
 #define RESERVED_BLOCKS       5/100
 #define MAX_RESERVED_BLOCKS  25/100
 
+/* The default value for s_creator_os. */
+#if defined(__linux__)    &&    defined(EXT2_OS_LINUX)
+#define CREATOR_OS EXT2_OS_LINUX
+#define CREATOR_OS_NAME "linux"
+#else
+#if defined(__GNU__)     &&     defined(EXT2_OS_HURD)
+#define CREATOR_OS EXT2_OS_HURD
+#define CREATOR_OS_NAME "hurd"
+#else
+#if defined(__FreeBSD__) &&     defined(EXT2_OS_FREEBSD)
+#define CREATOR_OS EXT2_OS_FREEBSD
+#define CREATOR_OS_NAME "freebsd"
+#else
+#if defined(LITES)         &&   defined(EXT2_OS_LITES)
+#define CREATOR_OS EXT2_OS_LITES
+#define CREATOR_OS_NAME "lites"
+#else
+#define CREATOR_OS EXT2_OS_LINUX /* by default */
+#define CREATOR_OS_NAME "linux"
+#endif /* defined(LITES) && defined(EXT2_OS_LITES) */
+#endif /* defined(__FreeBSD__) && defined(EXT2_OS_FREEBSD) */
+#endif /* defined(__GNU__)     && defined(EXT2_OS_HURD) */
+#endif /* defined(__linux__)   && defined(EXT2_OS_LINUX) */
+
 
 // inode block size (why is it != BLOCKSIZE ?!?)
 /* The field i_blocks in the ext2 inode stores the number of data blocks
@@ -190,6 +225,14 @@
 #define EXT2_TIND_BLOCK    14                    // triple indirect block
 #define EXT2_INIT_BLOCK    0xFFFFFFFF            // just initialized (not really a block address)
 
+// codes for operating systems
+
+#define EXT2_OS_LINUX           0
+#define EXT2_OS_HURD            1
+#define EXT2_OS_MASIX           2
+#define EXT2_OS_FREEBSD         3
+#define EXT2_OS_LITES           4
+
 // end of a block walk
 
 #define WALK_END           0xFFFFFFFE
@@ -227,44 +270,46 @@
 #define FM_IWOTH   0000002	// write
 #define FM_IXOTH   0000001	// execute
 
-// options
-
-#define OP_HOLES     0x01       // make files with holes
-
 /* Defines for accessing group details */
 
 // Number of groups in the filesystem
 #define GRP_NBGROUPS(fs) \
-	(((fs)->sb.s_blocks_count - fs->sb.s_first_data_block + \
-	  (fs)->sb.s_blocks_per_group - 1) / (fs)->sb.s_blocks_per_group)
+	(((fs)->sb->s_blocks_count - fs->sb->s_first_data_block + \
+	  (fs)->sb->s_blocks_per_group - 1) / (fs)->sb->s_blocks_per_group)
 
 // Get group block bitmap (bbm) given the group number
-#define GRP_GET_GROUP_BBM(fs,grp) ( get_blk((fs),(fs)->gd[(grp)].bg_block_bitmap) )
+#define GRP_GET_GROUP_BBM(fs,grp,bi) (get_blk((fs),(grp)->bg_block_bitmap,(bi)))
+#define GRP_PUT_GROUP_BBM(bi) ( put_blk((bi)) )
 
 // Get group inode bitmap (ibm) given the group number
-#define GRP_GET_GROUP_IBM(fs,grp) ( get_blk((fs),(fs)->gd[(grp)].bg_inode_bitmap) )
-		
+#define GRP_GET_GROUP_IBM(fs,grp,bi) (get_blk((fs), (grp)->bg_inode_bitmap,(bi)))
+#define GRP_PUT_GROUP_IBM(bi) ( put_blk((bi)) )
+
 // Given an inode number find the group it belongs to
-#define GRP_GROUP_OF_INODE(fs,nod) ( ((nod)-1) / (fs)->sb.s_inodes_per_group)
+#define GRP_GROUP_OF_INODE(fs,nod) ( ((nod)-1) / (fs)->sb->s_inodes_per_group)
 
 //Given an inode number get the inode bitmap that covers it
-#define GRP_GET_INODE_BITMAP(fs,nod) \
-	( GRP_GET_GROUP_IBM((fs),GRP_GROUP_OF_INODE((fs),(nod))) )
+#define GRP_GET_INODE_BITMAP(fs,nod,bi,gi)				\
+	( GRP_GET_GROUP_IBM((fs),get_gd(fs,GRP_GROUP_OF_INODE((fs),(nod)),gi),bi) )
+#define GRP_PUT_INODE_BITMAP(bi,gi)		\
+	( GRP_PUT_GROUP_IBM((bi)),put_gd((gi)) )
 
 //Given an inode number find its offset within the inode bitmap that covers it
 #define GRP_IBM_OFFSET(fs,nod) \
-	( (nod) - GRP_GROUP_OF_INODE((fs),(nod))*(fs)->sb.s_inodes_per_group )
+	( (nod) - GRP_GROUP_OF_INODE((fs),(nod))*(fs)->sb->s_inodes_per_group )
 
 // Given a block number find the group it belongs to
-#define GRP_GROUP_OF_BLOCK(fs,blk) ( ((blk)-1) / (fs)->sb.s_blocks_per_group)
+#define GRP_GROUP_OF_BLOCK(fs,blk) ( ((blk)-1) / (fs)->sb->s_blocks_per_group)
 	
-//Given a block number get the block bitmap that covers it
-#define GRP_GET_BLOCK_BITMAP(fs,blk) \
-	( GRP_GET_GROUP_BBM((fs),GRP_GROUP_OF_BLOCK((fs),(blk))) )
+//Given a block number get/put the block bitmap that covers it
+#define GRP_GET_BLOCK_BITMAP(fs,blk,bi,gi)				\
+	( GRP_GET_GROUP_BBM((fs),get_gd(fs,GRP_GROUP_OF_BLOCK((fs),(blk)),(gi)),(bi)) )
+#define GRP_PUT_BLOCK_BITMAP(bi,gi)		\
+	( GRP_PUT_GROUP_BBM((bi)),put_gd((gi)) )
 
 //Given a block number find its offset within the block bitmap that covers it
 #define GRP_BBM_OFFSET(fs,blk) \
-	( (blk) - GRP_GROUP_OF_BLOCK((fs),(blk))*(fs)->sb.s_blocks_per_group )
+	( (blk) - GRP_GROUP_OF_BLOCK((fs),(blk))*(fs)->sb->s_blocks_per_group )
 
 
 // used types
@@ -286,7 +331,9 @@
 // older solaris. Note that this is still not very portable, in that
 // the return value cannot be trusted.
 
-#if SCANF_CAN_MALLOC
+#if 0 // SCANF_CAN_MALLOC
+// C99 define "a" for floating point, so you can have runtime surprise
+// according the library versions
 # define SCANF_PREFIX "a"
 # define SCANF_STRING(s) (&s)
 #else
@@ -430,6 +477,17 @@
 			((val<<8)&0xFF0000) | (val<<24));
 }
 
+static inline int
+is_blk_empty(uint8 *b)
+{
+	uint32 i;
+	uint32 *v = (uint32 *) b;
+
+	for(i = 0; i < BLOCKSIZE / 4; i++)
+		if (*v++)
+			return 0;
+	return 1;
+}
 
 // on-disk structures
 // this trick makes me declare things only once
@@ -460,7 +518,22 @@
 	udecl32(s_creator_os)          /* Indicator of which OS created the filesystem */ \
 	udecl32(s_rev_level)           /* The revision level of the filesystem */ \
 	udecl16(s_def_resuid)          /* The default uid for reserved blocks */ \
-	udecl16(s_def_resgid)          /* The default gid for reserved blocks */
+	udecl16(s_def_resgid)          /* The default gid for reserved blocks */ \
+	/* rev 1 version fields start here */ \
+	udecl32(s_first_ino) 		/* First non-reserved inode */	\
+	udecl16(s_inode_size) 		/* size of inode structure */	\
+	udecl16(s_block_group_nr) 	/* block group # of this superblock */ \
+	udecl32(s_feature_compat) 	/* compatible feature set */	\
+	udecl32(s_feature_incompat) 	/* incompatible feature set */	\
+	udecl32(s_feature_ro_compat) 	/* readonly-compatible feature set */ \
+	utdecl8(s_uuid,16)		/* 128-bit uuid for volume */	\
+	utdecl8(s_volume_name,16) 	/* volume name */		\
+	utdecl8(s_last_mounted,64) 	/* directory where last mounted */ \
+	udecl32(s_algorithm_usage_bitmap) /* For compression */
+
+#define EXT2_GOOD_OLD_FIRST_INO	11
+#define EXT2_GOOD_OLD_INODE_SIZE 128
+#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
 
 #define groupdescriptor_decl \
 	udecl32(bg_block_bitmap)       /* Block number of the block bitmap */ \
@@ -500,6 +573,7 @@
 
 #define decl8(x) int8 x;
 #define udecl8(x) uint8 x;
+#define utdecl8(x,n) uint8 x[n];
 #define decl16(x) int16 x;
 #define udecl16(x) uint16 x;
 #define decl32(x) int32 x;
@@ -509,7 +583,7 @@
 typedef struct
 {
 	superblock_decl
-	uint32 s_reserved[235];       // Reserved
+	uint32 s_reserved[205];       // Reserved
 } superblock;
 
 typedef struct
@@ -527,10 +601,9 @@
 typedef struct
 {
 	directory_decl
-	char d_name[0];
 } directory;
 
-typedef uint8 block[BLOCKSIZE];
+typedef uint8 *block;
 
 /* blockwalker fields:
    The blockwalker is used to access all the blocks of a file (including
@@ -567,23 +640,41 @@
 	uint32 bptind;
 } blockwalker;
 
+#define HDLINK_CNT   16
+struct hdlink_s
+{
+	uint32	src_inode;
+	uint32	dst_nod;
+};
+
+struct hdlinks_s
+{
+	int32 count;
+	struct hdlink_s *hdl;
+};
 
 /* Filesystem structure that support groups */
-#if BLOCKSIZE == 1024
 typedef struct
 {
-	block zero;            // The famous block 0
-	superblock sb;         // The superblock
-	groupdescriptor gd[0]; // The group descriptors
+	FILE *f;
+	superblock *sb;
+	int swapit;
+	int32 hdlink_cnt;
+	struct hdlinks_s hdlinks;
+
+	int holes;
+
+	listcache blks;
+	listcache gds;
+	listcache inodes;
+	listcache blkmaps;
 } filesystem;
-#else
-#error UNHANDLED BLOCKSIZE
-#endif
 
 // now the endianness swap
 
 #undef decl8
 #undef udecl8
+#undef utdecl8
 #undef decl16
 #undef udecl16
 #undef decl32
@@ -592,28 +683,13 @@
 
 #define decl8(x)
 #define udecl8(x)
+#define utdecl8(x,n)
 #define decl16(x) this->x = swab16(this->x);
 #define udecl16(x) this->x = swab16(this->x);
 #define decl32(x) this->x = swab32(this->x);
 #define udecl32(x) this->x = swab32(this->x);
 #define utdecl32(x,n) { int i; for(i=0; i<n; i++) this->x[i] = swab32(this->x[i]); }
 
-#define HDLINK_CNT   16
-static int32 hdlink_cnt = HDLINK_CNT;
-struct hdlink_s
-{
-	uint32	src_inode;
-	uint32	dst_nod;
-};
-
-struct hdlinks_s 
-{
-	int32 count;
-	struct hdlink_s *hdl;
-};
-
-static struct hdlinks_s hdlinks;
-
 static void
 swap_sb(superblock *sb)
 {
@@ -633,9 +709,24 @@
 static void
 swap_nod(inode *nod)
 {
+	uint32 nblk;
+
 #define this nod
 	inode_decl
 #undef this
+
+	// block and character inodes store the major and minor in the
+	// i_block, so we need to unswap to get those.  Also, if it's
+	// zero iblocks, put the data back like it belongs.
+	nblk = nod->i_blocks / INOBLK;
+	if ((nod->i_size && !nblk)
+	    || ((nod->i_mode & FM_IFBLK) == FM_IFBLK)
+	    || ((nod->i_mode & FM_IFCHR) == FM_IFCHR))
+	{
+		int i;
+		for(i = 0; i <= EXT2_TIND_BLOCK; i++)
+			nod->i_block[i] = swab32(nod->i_block[i]);
+	}
 }
 
 static void
@@ -657,6 +748,7 @@
 
 #undef decl8
 #undef udecl8
+#undef utdecl8
 #undef decl16
 #undef udecl16
 #undef decl32
@@ -770,15 +862,15 @@
 }
 
 int
-is_hardlink(ino_t inode)
+is_hardlink(filesystem *fs, ino_t inode)
 {
 	int i;
 
-	for(i = 0; i < hdlinks.count; i++) {
-		if(hdlinks.hdl[i].src_inode == inode)
+	for(i = 0; i < fs->hdlinks.count; i++) {
+		if(fs->hdlinks.hdl[i].src_inode == inode)
 			return i;
 	}
-	return -1;		
+	return -1;
 }
 
 // printf helper macro
@@ -789,6 +881,8 @@
 get_workblk(void)
 {
 	unsigned char* b=calloc(1,BLOCKSIZE);
+	if (!b)
+		error_msg_and_die("get_workblk() failed, out of memory");
 	return b;
 }
 static inline void
@@ -811,24 +905,464 @@
 	return b[(item-1) / 8] & (1 << ((item-1) % 8));
 }
 
-// return a given block from a filesystem
+// Used by get_blk/put_blk to hold information about a block owned
+// by the user.
+typedef struct
+{
+	cache_link link;
+
+	filesystem *fs;
+	uint32 blk;
+	uint8 *b;
+	uint32 usecount;
+} blk_info;
+
+#define MAX_FREE_CACHE_BLOCKS 100
+
+static uint32
+blk_elem_val(cache_link *elem)
+{
+	blk_info *bi = container_of(elem, blk_info, link);
+	return bi->blk;
+}
+
+static void
+blk_freed(cache_link *elem)
+{
+	blk_info *bi = container_of(elem, blk_info, link);
+
+	if (fseeko(bi->fs->f, ((off_t) bi->blk) * BLOCKSIZE, SEEK_SET))
+		perror_msg_and_die("fseek");
+	if (fwrite(bi->b, BLOCKSIZE, 1, bi->fs->f) != 1)
+		perror_msg_and_die("get_blk: write");
+	free(bi->b);
+	free(bi);
+}
+
+// Return a given block from a filesystem.  Make sure to call
+// put_blk when you are done with it.
 static inline uint8 *
-get_blk(filesystem *fs, uint32 blk)
+get_blk(filesystem *fs, uint32 blk, blk_info **rbi)
 {
-	return (uint8*)fs + blk*BLOCKSIZE;
+	cache_link *curr;
+	blk_info *bi;
+
+	if (blk >= fs->sb->s_blocks_count)
+		error_msg_and_die("Internal error, block out of range");
+
+	curr = cache_find(&fs->blks, blk);
+	if (curr) {
+		bi = container_of(curr, blk_info, link);
+		bi->usecount++;
+		goto out;
+	}
+
+	bi = malloc(sizeof(*bi));
+	if (!bi)
+		error_msg_and_die("get_blk: out of memory");
+	bi->fs = fs;
+	bi->blk = blk;
+	bi->usecount = 1;
+	bi->b = malloc(BLOCKSIZE);
+	if (!bi->b)
+		error_msg_and_die("get_blk: out of memory");
+	cache_add(&fs->blks, &bi->link);
+	if (fseeko(fs->f, ((off_t) blk) * BLOCKSIZE, SEEK_SET))
+		perror_msg_and_die("fseek");
+	if (fread(bi->b, BLOCKSIZE, 1, fs->f) != 1) {
+		if (ferror(fs->f))
+			perror_msg_and_die("fread");
+		memset(bi->b, 0, BLOCKSIZE);
+	}
+
+out:
+	*rbi = bi;
+	return bi->b;
 }
 
 // return a given inode from a filesystem
-static inline inode *
-get_nod(filesystem *fs, uint32 nod)
+static inline void
+put_blk(blk_info *bi)
+{
+	if (bi->usecount == 0)
+		error_msg_and_die("Internal error: put_blk usecount zero");
+	bi->usecount--;
+	if (bi->usecount == 0)
+		/* Free happens in the cache code */
+		cache_item_set_unused(&bi->fs->blks, &bi->link);
+}
+
+typedef struct
 {
-	int grp,offset;
+	cache_link link;
+
+	filesystem *fs;
+	int gds;
+	blk_info *bi;
+	groupdescriptor *gd;
+	uint32 usecount;
+} gd_info;
+
+#define MAX_FREE_CACHE_GDS 100
+
+static uint32
+gd_elem_val(cache_link *elem)
+{
+	gd_info *gi = container_of(elem, gd_info, link);
+	return gi->gds;
+}
+
+static void
+gd_freed(cache_link *elem)
+{
+	gd_info *gi = container_of(elem, gd_info, link);
+
+	if (gi->fs->swapit)
+		swap_gd(gi->gd);
+	put_blk(gi->bi);
+	free(gi);
+}
+
+#define GDS_START ((SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE + BLOCKSIZE - 1) / BLOCKSIZE)
+#define GDS_PER_BLOCK (BLOCKSIZE / sizeof(groupdescriptor))
+// the group descriptors are aligned on the block size
+static inline groupdescriptor *
+get_gd(filesystem *fs, uint32 no, gd_info **rgi)
+{
+	uint32 gdblk;
+	uint32 offset;
+	gd_info *gi;
+	cache_link *curr;
+
+	curr = cache_find(&fs->gds, no);
+	if (curr) {
+		gi = container_of(curr, gd_info, link);
+		gi->usecount++;
+		goto out;
+	}
+
+	gi = malloc(sizeof(*gi));
+	if (!gi)
+		error_msg_and_die("get_gd: out of memory");
+	gi->fs = fs;
+	gi->gds = no;
+	gi->usecount = 1;
+	gdblk = GDS_START + (no / GDS_PER_BLOCK);
+	offset = no % GDS_PER_BLOCK;
+	gi->gd = ((groupdescriptor *) get_blk(fs, gdblk, &gi->bi)) + offset;
+	cache_add(&fs->gds, &gi->link);
+	if (fs->swapit)
+		swap_gd(gi->gd);
+ out:
+	*rgi = gi;
+
+	return gi->gd;
+}
+
+static inline void
+put_gd(gd_info *gi)
+{
+	if (gi->usecount == 0)
+		error_msg_and_die("Internal error: put_gd usecount zero");
+
+	gi->usecount--;
+	if (gi->usecount == 0)
+		/* Free happens in the cache code */
+		cache_item_set_unused(&gi->fs->gds, &gi->link);
+}
+
+// Used by get_blkmap/put_blkmap to hold information about an block map
+// owned by the user.
+typedef struct
+{
+	cache_link link;
+
+	filesystem *fs;
+	uint32 blk;
+	uint8 *b;
+	blk_info *bi;
+	uint32 usecount;
+} blkmap_info;
+
+#define MAX_FREE_CACHE_BLOCKMAPS 100
+
+static uint32
+blkmap_elem_val(cache_link *elem)
+{
+	blkmap_info *bmi = container_of(elem, blkmap_info, link);
+	return bmi->blk;
+}
+
+static void
+blkmap_freed(cache_link *elem)
+{
+	blkmap_info *bmi = container_of(elem, blkmap_info, link);
+
+	if (bmi->fs->swapit)
+		swap_block(bmi->b);
+	put_blk(bmi->bi);
+	free(bmi);
+}
+
+// Return a given block map from a filesystem.  Make sure to call
+// put_blkmap when you are done with it.
+static inline uint32 *
+get_blkmap(filesystem *fs, uint32 blk, blkmap_info **rbmi)
+{
+	blkmap_info *bmi;
+	cache_link *curr;
+
+	curr = cache_find(&fs->blkmaps, blk);
+	if (curr) {
+		bmi = container_of(curr, blkmap_info, link);
+		bmi->usecount++;
+		goto out;
+	}
+
+	bmi = malloc(sizeof(*bmi));
+	if (!bmi)
+		error_msg_and_die("get_blkmap: out of memory");
+	bmi->fs = fs;
+	bmi->blk = blk;
+	bmi->b = get_blk(fs, blk, &bmi->bi);
+	bmi->usecount = 1;
+	cache_add(&fs->blkmaps, &bmi->link);
+
+	if (fs->swapit)
+		swap_block(bmi->b);
+ out:
+	*rbmi = bmi;
+	return (uint32 *) bmi->b;
+}
+
+static inline void
+put_blkmap(blkmap_info *bmi)
+{
+	if (bmi->usecount == 0)
+		error_msg_and_die("Internal error: put_blkmap usecount zero");
+
+	bmi->usecount--;
+	if (bmi->usecount == 0)
+		/* Free happens in the cache code */
+		cache_item_set_unused(&bmi->fs->blkmaps, &bmi->link);
+}
+
+// Used by get_nod/put_nod to hold information about an inode owned
+// by the user.
+typedef struct
+{
+	cache_link link;
+
+	filesystem *fs;
+	uint32 nod;
+	uint8 *b;
+	blk_info *bi;
 	inode *itab;
+	uint32 usecount;
+} nod_info;
+
+#define MAX_FREE_CACHE_INODES 100
+
+static uint32
+inode_elem_val(cache_link *elem)
+{
+	nod_info *ni = container_of(elem, nod_info, link);
+	return ni->nod;
+}
+
+static void
+inode_freed(cache_link *elem)
+{
+	nod_info *ni = container_of(elem, nod_info, link);
+
+	if (ni->fs->swapit)
+		swap_nod(ni->itab);
+	put_blk(ni->bi);
+	free(ni);
+}
+
+#define INODES_PER_BLOCK (BLOCKSIZE / sizeof(inode))
 
-	offset = GRP_IBM_OFFSET(fs,nod);
+// return a given inode from a filesystem
+static inline inode *
+get_nod(filesystem *fs, uint32 nod, nod_info **rni)
+{
+	uint32 grp, boffset, offset;
+	cache_link *curr;
+	groupdescriptor *gd;
+	gd_info *gi;
+	nod_info *ni;
+
+	curr = cache_find(&fs->inodes, nod);
+	if (curr) {
+		ni = container_of(curr, nod_info, link);
+		ni->usecount++;
+		goto out;
+	}
+
+	ni = malloc(sizeof(*ni));
+	if (!ni)
+		error_msg_and_die("get_nod: out of memory");
+	ni->fs = fs;
+	ni->nod = nod;
+	ni->usecount = 1;
+	cache_add(&fs->inodes, &ni->link);
+
+	offset = GRP_IBM_OFFSET(fs,nod) - 1;
+	boffset = offset / INODES_PER_BLOCK;
+	offset %= INODES_PER_BLOCK;
 	grp = GRP_GROUP_OF_INODE(fs,nod);
-	itab = (inode *)get_blk(fs, fs->gd[grp].bg_inode_table);
-	return itab+offset-1;
+	gd = get_gd(fs, grp, &gi);
+	ni->b = get_blk(fs, gd->bg_inode_table + boffset, &ni->bi);
+	ni->itab = ((inode *) ni->b) + offset;
+	if (fs->swapit)
+		swap_nod(ni->itab);
+	put_gd(gi);
+ out:
+	*rni = ni;
+	return ni->itab;
+}
+
+static inline void
+put_nod(nod_info *ni)
+{
+	if (ni->usecount == 0)
+		error_msg_and_die("Internal error: put_nod usecount zero");
+
+	ni->usecount--;
+	if (ni->usecount == 0)
+		/* Free happens in the cache code */
+		cache_item_set_unused(&ni->fs->inodes, &ni->link);
+}
+
+// Used to hold state information while walking a directory inode.
+typedef struct
+{
+	directory d;
+	filesystem *fs;
+	uint32 nod;
+	directory *last_d;
+	uint8 *b;
+	blk_info *bi;
+} dirwalker;
+
+// Start a directory walk on the given inode.  You must pass in a
+// dirwalker structure, then use that dirwalker for future operations.
+// Call put_dir when you are done walking the directory.
+static inline directory *
+get_dir(filesystem *fs, uint32 nod, dirwalker *dw)
+{
+	dw->fs = fs;
+	dw->b = get_blk(fs, nod, &dw->bi);
+	dw->nod = nod;
+	dw->last_d = (directory *) dw->b;
+
+	memcpy(&dw->d, dw->last_d, sizeof(directory));
+	if (fs->swapit)
+		swap_dir(&dw->d);
+	return &dw->d;
+}
+
+// Move to the next directory.
+static inline directory *
+next_dir(dirwalker *dw)
+{
+	directory *next_d = (directory *)((int8*)dw->last_d + dw->d.d_rec_len);
+
+	if (dw->fs->swapit)
+		swap_dir(&dw->d);
+	memcpy(dw->last_d, &dw->d, sizeof(directory));
+
+	if (((int8 *) next_d) >= ((int8 *) dw->b + BLOCKSIZE))
+		return NULL;
+
+	dw->last_d = next_d;
+	memcpy(&dw->d, next_d, sizeof(directory));
+	if (dw->fs->swapit)
+		swap_dir(&dw->d);
+	return &dw->d;
+}
+
+// Call then when you are done with the directory walk.
+static inline void
+put_dir(dirwalker *dw)
+{
+	if (dw->fs->swapit)
+		swap_dir(&dw->d);
+	memcpy(dw->last_d, &dw->d, sizeof(directory));
+
+	if (dw->nod == 0)
+		free_workblk(dw->b);
+	else
+		put_blk(dw->bi);
+}
+
+// Create a new directory block with the given inode as it's destination
+// and append it to the current dirwalker.
+static directory *
+new_dir(filesystem *fs, uint32 dnod, const char *name, int nlen, dirwalker *dw)
+{
+	directory *d;
+
+	dw->fs = fs;
+	dw->b = get_workblk();
+	dw->nod = 0;
+	dw->last_d = (directory *) dw->b;
+	d = &dw->d;
+	d->d_inode = dnod;
+	d->d_rec_len = BLOCKSIZE;
+	d->d_name_len = nlen;
+	strncpy(((char *) dw->last_d) + sizeof(directory), name, nlen);
+	return d;
+}
+
+// Shrink the current directory entry, make a new one with the free
+// space, and return the new directory entry (making it current).
+static inline directory *
+shrink_dir(dirwalker *dw, uint32 nod, const char *name, int nlen)
+{
+	int reclen, preclen;
+	directory *d = &dw->d;
+
+	reclen = d->d_rec_len;
+	d->d_rec_len = sizeof(directory) + rndup(d->d_name_len, 4);
+	preclen = d->d_rec_len;
+	reclen -= preclen;
+	if (dw->fs->swapit)
+		swap_dir(&dw->d);
+	memcpy(dw->last_d, &dw->d, sizeof(directory));
+
+	dw->last_d = (directory *) (((int8 *) dw->last_d) + preclen);
+	d->d_rec_len = reclen;
+	d->d_inode = nod;
+	d->d_name_len = nlen;
+	strncpy(((char *) dw->last_d) + sizeof(directory), name, nlen);
+
+	return d;
+}
+
+// Return the current block the directory is walking
+static inline uint8 *
+dir_data(dirwalker *dw)
+{
+	return dw->b;
+}
+
+// Return the pointer to the name for the current directory
+static inline char *
+dir_name(dirwalker *dw)
+{
+	return ((char *) dw->last_d) + sizeof(directory);
+}
+
+// Set the name for the current directory.  Note that this doesn't
+// verify that there is space for the directory name, you must do
+// that yourself.
+static void
+dir_set_name(dirwalker *dw, const char *name, int nlen)
+{
+	dw->d.d_name_len = nlen;
+	strncpy(((char *) dw->last_d) + sizeof(directory), name, nlen);
 }
 
 // allocate a given block/inode in the bitmap
@@ -870,21 +1404,34 @@
 {
 	uint32 bk=0;
 	uint32 grp,nbgroups;
+	blk_info *bi;
+	groupdescriptor *gd;
+	gd_info *gi;
 
 	grp = GRP_GROUP_OF_INODE(fs,nod);
 	nbgroups = GRP_NBGROUPS(fs);
-	if(!(bk = allocate(get_blk(fs,fs->gd[grp].bg_block_bitmap), 0))) {
-		for(grp=0;grp<nbgroups && !bk;grp++)
-			bk=allocate(get_blk(fs,fs->gd[grp].bg_block_bitmap),0);
+	gd = get_gd(fs, grp, &gi);
+	bk = allocate(GRP_GET_GROUP_BBM(fs, gd, &bi), 0);
+	GRP_PUT_GROUP_BBM(bi);
+	put_gd(gi);
+	if (!bk) {
+		for (grp=0; grp<nbgroups && !bk; grp++) {
+			gd = get_gd(fs, grp, &gi);
+			bk = allocate(GRP_GET_GROUP_BBM(fs, gd, &bi), 0);
+			GRP_PUT_GROUP_BBM(bi);
+			put_gd(gi);
+		}
 		grp--;
 	}
 	if (!bk)
 		error_msg_and_die("couldn't allocate a block (no free space)");
-	if(!(fs->gd[grp].bg_free_blocks_count--))
+	gd = get_gd(fs, grp, &gi);
+	if(!(gd->bg_free_blocks_count--))
 		error_msg_and_die("group descr %d. free blocks count == 0 (corrupted fs?)",grp);
-	if(!(fs->sb.s_free_blocks_count--))
+	put_gd(gi);
+	if(!(fs->sb->s_free_blocks_count--))
 		error_msg_and_die("superblock free blocks count == 0 (corrupted fs?)");
-	return fs->sb.s_blocks_per_group*grp + bk;
+	return fs->sb->s_first_data_block + fs->sb->s_blocks_per_group*grp + (bk-1);
 }
 
 // free a block
@@ -892,12 +1439,18 @@
 free_blk(filesystem *fs, uint32 bk)
 {
 	uint32 grp;
-
-	grp = bk / fs->sb.s_blocks_per_group;
-	bk %= fs->sb.s_blocks_per_group;
-	deallocate(get_blk(fs,fs->gd[grp].bg_block_bitmap), bk);
-	fs->gd[grp].bg_free_blocks_count++;
-	fs->sb.s_free_blocks_count++;
+	blk_info *bi;
+	gd_info *gi;
+	groupdescriptor *gd;
+
+	grp = bk / fs->sb->s_blocks_per_group;
+	bk %= fs->sb->s_blocks_per_group;
+	gd = get_gd(fs, grp, &gi);
+	deallocate(GRP_GET_GROUP_BBM(fs, gd, &bi), bk);
+	GRP_PUT_GROUP_BBM(bi);
+	gd->bg_free_blocks_count++;
+	put_gd(gi);
+	fs->sb->s_free_blocks_count++;
 }
 
 // allocate an inode
@@ -906,6 +1459,9 @@
 {
 	uint32 nod,best_group=0;
 	uint32 grp,nbgroups,avefreei;
+	blk_info *bi;
+	gd_info *gi, *bestgi;
+	groupdescriptor *gd, *bestgd;
 
 	nbgroups = GRP_NBGROUPS(fs);
 
@@ -914,22 +1470,32 @@
 	/* find the one with the most free blocks and allocate node there     */
 	/* Idea from find_group_dir in fs/ext2/ialloc.c in 2.4.19 kernel      */
 	/* We do it for all inodes.                                           */
-	avefreei  =  fs->sb.s_free_inodes_count / nbgroups;
+	avefreei  =  fs->sb->s_free_inodes_count / nbgroups;
+	bestgd = get_gd(fs, best_group, &bestgi);
 	for(grp=0; grp<nbgroups; grp++) {
-		if (fs->gd[grp].bg_free_inodes_count < avefreei ||
-		    fs->gd[grp].bg_free_inodes_count == 0)
+		gd = get_gd(fs, grp, &gi);
+		if (gd->bg_free_inodes_count < avefreei ||
+		    gd->bg_free_inodes_count == 0) {
+			put_gd(gi);
 			continue;
-		if (!best_group || 
-			fs->gd[grp].bg_free_blocks_count > fs->gd[best_group].bg_free_blocks_count)
+		}
+		if (!best_group || gd->bg_free_blocks_count > bestgd->bg_free_blocks_count) {
+			put_gd(bestgi);
 			best_group = grp;
+			bestgd = gd;
+			bestgi = gi;
+		} else
+			put_gd(gi);
 	}
-	if (!(nod = allocate(get_blk(fs,fs->gd[best_group].bg_inode_bitmap),0)))
+	if (!(nod = allocate(GRP_GET_GROUP_IBM(fs, bestgd, &bi), 0)))
 		error_msg_and_die("couldn't allocate an inode (no free inode)");
-	if(!(fs->gd[best_group].bg_free_inodes_count--))
+	GRP_PUT_GROUP_IBM(bi);
+	if(!(bestgd->bg_free_inodes_count--))
 		error_msg_and_die("group descr. free blocks count == 0 (corrupted fs?)");
-	if(!(fs->sb.s_free_inodes_count--))
+	put_gd(bestgi);
+	if(!(fs->sb->s_free_inodes_count--))
 		error_msg_and_die("superblock free blocks count == 0 (corrupted fs?)");
-	return fs->sb.s_inodes_per_group*best_group+nod;
+	return fs->sb->s_inodes_per_group*best_group+nod;
 }
 
 // print a bitmap allocation
@@ -962,30 +1528,40 @@
 //				  used after being freed, so once you start
 //				  freeing blocks don't stop until the end of
 //				  the file. moreover, i_blocks isn't updated.
-//				  in fact, don't do that, just use extend_blk
 // if hole!=0, create a hole in the file
 static uint32
 walk_bw(filesystem *fs, uint32 nod, blockwalker *bw, int32 *create, uint32 hole)
 {
 	uint32 *bkref = 0;
+	uint32 bk = 0;
+	blkmap_info *bmi1 = NULL, *bmi2 = NULL, *bmi3 = NULL;
 	uint32 *b;
 	int extend = 0, reduce = 0;
+	inode *inod;
+	nod_info *ni;
+	uint32 *iblk;
+
 	if(create && (*create) < 0)
 		reduce = 1;
-	if(bw->bnum >= get_nod(fs, nod)->i_blocks / INOBLK)
+	inod = get_nod(fs, nod, &ni);
+	if(bw->bnum >= inod->i_blocks / INOBLK)
 	{
 		if(create && (*create) > 0)
 		{
 			(*create)--;
 			extend = 1;
 		}
-		else	
+		else
+		{
+			put_nod(ni);
 			return WALK_END;
+		}
 	}
+	iblk = inod->i_block;
 	// first direct block
 	if(bw->bpdir == EXT2_INIT_BLOCK)
 	{
-		bkref = &get_nod(fs, nod)->i_block[bw->bpdir = 0];
+		bkref = &iblk[bw->bpdir = 0];
 		if(extend) // allocate first block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
 		if(reduce) // free first block
@@ -994,7 +1570,7 @@
 	// direct block
 	else if(bw->bpdir < EXT2_NDIR_BLOCKS)
 	{
-		bkref = &get_nod(fs, nod)->i_block[++bw->bpdir];
+		bkref = &iblk[++bw->bpdir];
 		if(extend) // allocate block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
 		if(reduce) // free block
@@ -1007,10 +1583,10 @@
 		bw->bpdir = EXT2_IND_BLOCK;
 		bw->bpind = 0;
 		if(extend) // allocate indirect block
-			get_nod(fs, nod)->i_block[bw->bpdir] = alloc_blk(fs,nod);
+			iblk[bw->bpdir] = alloc_blk(fs,nod);
 		if(reduce) // free indirect block
-			free_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
+			free_blk(fs, iblk[bw->bpdir]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
 		bkref = &b[bw->bpind];
 		if(extend) // allocate first block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1021,7 +1597,7 @@
 	else if((bw->bpdir == EXT2_IND_BLOCK) && (bw->bpind < BLOCKSIZE/4 - 1))
 	{
 		bw->bpind++;
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
 		bkref = &b[bw->bpind];
 		if(extend) // allocate block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1036,15 +1612,15 @@
 		bw->bpind = 0;
 		bw->bpdind = 0;
 		if(extend) // allocate double indirect block
-			get_nod(fs, nod)->i_block[bw->bpdir] = alloc_blk(fs,nod);
+			iblk[bw->bpdir] = alloc_blk(fs,nod);
 		if(reduce) // free double indirect block
-			free_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
+			free_blk(fs, iblk[bw->bpdir]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
 		if(extend) // allocate first indirect block
 			b[bw->bpind] = alloc_blk(fs,nod);
 		if(reduce) // free  firstindirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
 		bkref = &b[bw->bpdind];
 		if(extend) // allocate first block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1055,8 +1631,8 @@
 	else if((bw->bpdir == EXT2_DIND_BLOCK) && (bw->bpdind < BLOCKSIZE/4 - 1))
 	{
 		bw->bpdind++;
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
 		bkref = &b[bw->bpdind];
 		if(extend) // allocate block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1069,12 +1645,12 @@
 		bw->bnum++;
 		bw->bpdind = 0;
 		bw->bpind++;
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
 		if(extend) // allocate indirect block
 			b[bw->bpind] = alloc_blk(fs,nod);
 		if(reduce) // free indirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
 		bkref = &b[bw->bpdind];
 		if(extend) // allocate first block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1094,20 +1670,20 @@
 		bw->bpdind = 0;
 		bw->bptind = 0;
 		if(extend) // allocate triple indirect block
-			get_nod(fs, nod)->i_block[bw->bpdir] = alloc_blk(fs,nod);
+			iblk[bw->bpdir] = alloc_blk(fs,nod);
 		if(reduce) // free triple indirect block
-			free_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
+			free_blk(fs, iblk[bw->bpdir]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
 		if(extend) // allocate first double indirect block
 			b[bw->bpind] = alloc_blk(fs,nod);
 		if(reduce) // free first double indirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
 		if(extend) // allocate first indirect block
 			b[bw->bpdind] = alloc_blk(fs,nod);
 		if(reduce) // free first indirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpdind]);
+		b = get_blkmap(fs, b[bw->bpdind], &bmi3);
 		bkref = &b[bw->bptind];
 		if(extend) // allocate first data block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1121,9 +1697,9 @@
 		  (bw->bptind < BLOCKSIZE/4 -1) )
 	{
 		bw->bptind++;
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpdind]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
+		b = get_blkmap(fs, b[bw->bpdind], &bmi3);
 		bkref = &b[bw->bptind];
 		if(extend) // allocate data block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1140,13 +1716,13 @@
 		bw->bnum++;
 		bw->bptind = 0;
 		bw->bpdind++;
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
 		if(extend) // allocate single indirect block
 			b[bw->bpdind] = alloc_blk(fs,nod);
 		if(reduce) // free indirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpdind]);
+		b = get_blkmap(fs, b[bw->bpdind], &bmi3);
 		bkref = &b[bw->bptind];
 		if(extend) // allocate first data block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1163,17 +1739,17 @@
 		bw->bpdind = 0;
 		bw->bptind = 0;
 		bw->bpind++;
-		b = (uint32*)get_blk(fs, get_nod(fs, nod)->i_block[bw->bpdir]);
+		b = get_blkmap(fs, iblk[bw->bpdir], &bmi1);
 		if(extend) // allocate double indirect block
 			b[bw->bpind] = alloc_blk(fs,nod);
 		if(reduce) // free double indirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpind]);
+		b = get_blkmap(fs, b[bw->bpind], &bmi2);
 		if(extend) // allocate single indirect block
 			b[bw->bpdind] = alloc_blk(fs,nod);
 		if(reduce) // free indirect block
 			free_blk(fs, b[bw->bpind]);
-		b = (uint32*)get_blk(fs, b[bw->bpdind]);
+		b = get_blkmap(fs, b[bw->bpdind], &bmi3);
 		bkref = &b[bw->bptind];
 		if(extend) // allocate first block
 			*bkref = hole ? 0 : alloc_blk(fs,nod);
@@ -1184,56 +1760,105 @@
 		error_msg_and_die("file too big !"); 
 	/* End change for walking triple indirection */
 
-	if(*bkref)
-	{
+	bk = *bkref;
+	if (bmi3)
+		put_blkmap(bmi3);
+	if (bmi2)
+		put_blkmap(bmi2);
+	if (bmi1)
+		put_blkmap(bmi1);
+
+	if(bk)
+	{
+		blk_info *bi;
+		gd_info *gi;
+		uint8 *block;
 		bw->bnum++;
-		if(!reduce && !allocated(GRP_GET_BLOCK_BITMAP(fs,*bkref), GRP_BBM_OFFSET(fs,*bkref)))
-			error_msg_and_die("[block %d of inode %d is unallocated !]", *bkref, nod);
+		block = GRP_GET_BLOCK_BITMAP(fs,bk,&bi,&gi);
+		if(!reduce && !allocated(block, GRP_BBM_OFFSET(fs,bk)))
+			error_msg_and_die("[block %d of inode %d is unallocated !]", bk, nod);
+		GRP_PUT_BLOCK_BITMAP(bi, gi);
 	}
 	if(extend)
-		get_nod(fs, nod)->i_blocks = bw->bnum * INOBLK;
-	return *bkref;
+		inod->i_blocks = bw->bnum * INOBLK;
+	put_nod(ni);
+	return bk;
 }
 
-// add blocks to an inode (file/dir/etc...)
-static void
-extend_blk(filesystem *fs, uint32 nod, block b, int amount)
+typedef struct
 {
-	int create = amount;
-	blockwalker bw, lbw;
-	uint32 bk;
-	init_bw(&bw);
-	if(amount < 0)
-	{
-		uint32 i;
-		for(i = 0; i < get_nod(fs, nod)->i_blocks / INOBLK + amount; i++)
-			walk_bw(fs, nod, &bw, 0, 0);
-		while(walk_bw(fs, nod, &bw, &create, 0) != WALK_END)
+	blockwalker bw;
+	uint32 nod;
+	nod_info *ni;
+	inode *inod;
+} inode_pos;
+#define INODE_POS_TRUNCATE 0
+#define INODE_POS_EXTEND 1
+
+// Call this to set up an ipos structure for future use with
+// extend_inode_blk to append blocks to the given inode.  If
+// op is INODE_POS_TRUNCATE, the inode is truncated to zero size.
+// If op is INODE_POS_EXTEND, the position is moved to the end
+// of the inode's data blocks.
+// Call inode_pos_finish when done with the inode_pos structure.
+static void
+inode_pos_init(filesystem *fs, inode_pos *ipos, uint32 nod, int op,
+	       blockwalker *endbw)
+{
+	blockwalker lbw;
+
+	init_bw(&ipos->bw);
+	ipos->nod = nod;
+	ipos->inod = get_nod(fs, nod, &ipos->ni);
+	if (op == INODE_POS_TRUNCATE) {
+		int32 create = -1;
+		while(walk_bw(fs, nod, &ipos->bw, &create, 0) != WALK_END)
 			/*nop*/;
-		get_nod(fs, nod)->i_blocks += amount * INOBLK;
+		ipos->inod->i_blocks = 0;
 	}
-	else
+
+	if (endbw)
+		ipos->bw = *endbw;
+	else {
+		/* Seek to the end */
+		init_bw(&ipos->bw);
+		lbw = ipos->bw;
+		while(walk_bw(fs, nod, &ipos->bw, 0, 0) != WALK_END)
+			lbw = ipos->bw;
+		ipos->bw = lbw;
+	}
+}
+
+// Clean up the inode_pos structure.
+static void
+inode_pos_finish(filesystem *fs, inode_pos *ipos)
+{
+	put_nod(ipos->ni);
+}
+
+// add blocks to an inode (file/dir/etc...) at the given position.
+// This will only work when appending to the end of an inode.
+static void
+extend_inode_blk(filesystem *fs, inode_pos *ipos, block b, int amount)
+{
+	uint32 bk;
+	uint32 pos;
+
+	if (amount < 0)
+		error_msg_and_die("extend_inode_blk: Got negative amount");
+
+	for (pos = 0; amount; pos += BLOCKSIZE)
 	{
-		lbw = bw;
-		while((bk = walk_bw(fs, nod, &bw, 0, 0)) != WALK_END)
-			lbw = bw;
-		bw = lbw;
-		while(create)
-		{
-			int i, copyb = 0;
-			if(!(fs->sb.s_reserved[200] & OP_HOLES))
-				copyb = 1;
-			else
-				for(i = 0; i < BLOCKSIZE / 4; i++)
-					if(((int32*)(b + BLOCKSIZE * (amount - create)))[i])
-					{
-						copyb = 1;
-						break;
-					}
-			if((bk = walk_bw(fs, nod, &bw, &create, !copyb)) == WALK_END)
-				break;
-			if(copyb)
-				memcpy(get_blk(fs, bk), b + BLOCKSIZE * (amount - create - 1), BLOCKSIZE);
+		int hole = (fs->holes && is_blk_empty(b + pos));
+
+		bk = walk_bw(fs, ipos->nod, &ipos->bw, &amount, hole);
+		if (bk == WALK_END)
+			error_msg_and_die("extend_inode_blk: extend failed");
+		if (!hole) {
+			blk_info *bi;
+			uint8 *block = get_blk(fs, bk, &bi);
+			memcpy(block, b + pos, BLOCKSIZE);
+			put_blk(bi);
 		}
 	}
 }
@@ -1242,15 +1867,17 @@
 static void
 add2dir(filesystem *fs, uint32 dnod, uint32 nod, const char* name)
 {
-	blockwalker bw;
+	blockwalker bw, lbw;
 	uint32 bk;
-	uint8 *b;
 	directory *d;
+	dirwalker dw;
 	int reclen, nlen;
 	inode *node;
 	inode *pnode;
+	nod_info *dni, *ni;
+	inode_pos ipos;
 
-	pnode = get_nod(fs, dnod);
+	pnode = get_nod(fs, dnod, &dni);
 	if((pnode->i_mode & FM_IFMT) != FM_IFDIR)
 		error_msg_and_die("can't add '%s' to a non-directory", name);
 	if(!*name)
@@ -1262,52 +1889,52 @@
 	if(reclen > BLOCKSIZE)
 		error_msg_and_die("bad name '%s' (too long)", name);
 	init_bw(&bw);
+	lbw = bw;
 	while((bk = walk_bw(fs, dnod, &bw, 0, 0)) != WALK_END) // for all blocks in dir
 	{
-		b = get_blk(fs, bk);
 		// for all dir entries in block
-		for(d = (directory*)b; (int8*)d + sizeof(*d) < (int8*)b + BLOCKSIZE; d = (directory*)((int8*)d + d->d_rec_len))
+		for(d = get_dir(fs, bk, &dw); d; d = next_dir(&dw))
 		{
 			// if empty dir entry, large enough, use it
 			if((!d->d_inode) && (d->d_rec_len >= reclen))
 			{
 				d->d_inode = nod;
-				node = get_nod(fs, nod);
+				node = get_nod(fs, nod, &ni);
+				dir_set_name(&dw, name, nlen);
+				put_dir(&dw);
 				node->i_links_count++;
-				d->d_name_len = nlen;
-				strncpy(d->d_name, name, nlen);
-				return;
+				put_nod(ni);
+				goto out;
 			}
 			// if entry with enough room (last one?), shrink it & use it
 			if(d->d_rec_len >= (sizeof(directory) + rndup(d->d_name_len, 4) + reclen))
 			{
-				reclen = d->d_rec_len;
-				d->d_rec_len = sizeof(directory) + rndup(d->d_name_len, 4);
-				reclen -= d->d_rec_len;
-				d = (directory*) (((int8*)d) + d->d_rec_len);
-				d->d_rec_len = reclen;
-				d->d_inode = nod;
-				node = get_nod(fs, nod);
+				d = shrink_dir(&dw, nod, name, nlen);
+				put_dir(&dw);
+				node = get_nod(fs, nod, &ni);
 				node->i_links_count++;
-				d->d_name_len = nlen;
-				strncpy(d->d_name, name, nlen);
-				return;
+				put_nod(ni);
+				goto out;
 			}
 		}
+		put_dir(&dw);
+		lbw = bw;
 	}
 	// we found no free entry in the directory, so we add a block
-	if(!(b = get_workblk()))
-		error_msg_and_die("get_workblk() failed.");
-	d = (directory*)b;
-	d->d_inode = nod;
-	node = get_nod(fs, nod);
+	node = get_nod(fs, nod, &ni);
+	d = new_dir(fs, nod, name, nlen, &dw);
 	node->i_links_count++;
-	d->d_rec_len = BLOCKSIZE;
-	d->d_name_len = nlen;
-	strncpy(d->d_name, name, nlen);
-	extend_blk(fs, dnod, b, 1);
-	get_nod(fs, dnod)->i_size += BLOCKSIZE;
-	free_workblk(b);
+	put_nod(ni);
+	next_dir(&dw); // Force the data into the buffer
+
+	inode_pos_init(fs, &ipos, dnod, INODE_POS_EXTEND, &lbw);
+	extend_inode_blk(fs, &ipos, dir_data(&dw), 1);
+	inode_pos_finish(fs, &ipos);
+
+	put_dir(&dw);
+	pnode->i_size += BLOCKSIZE;
+out:
+	put_nod(dni);
 }
 
 // find an entry in a directory
@@ -1321,11 +1948,13 @@
 	while((bk = walk_bw(fs, nod, &bw, 0, 0)) != WALK_END)
 	{
 		directory *d;
-		uint8 *b;
-		b = get_blk(fs, bk);
-		for(d = (directory*)b; (int8*)d + sizeof(*d) < (int8*)b + BLOCKSIZE; d = (directory*)((int8*)d + d->d_rec_len))
-			if(d->d_inode && (nlen == d->d_name_len) && !strncmp(d->d_name, name, nlen))
+		dirwalker dw;
+		for (d = get_dir(fs, bk, &dw); d; d=next_dir(&dw))
+			if(d->d_inode && (nlen == d->d_name_len) && !strncmp(dir_name(&dw), name, nlen)) {
+				put_dir(&dw);
 				return d->d_inode;
+			}
+		put_dir(&dw);
 	}
 	return 0;
 }
@@ -1356,47 +1985,55 @@
 	return nod;
 }
 
+// chmod an inode
+void
+chmod_fs(filesystem *fs, uint32 nod, uint16 mode, uint16 uid, uint16 gid)
+{
+	inode *node;
+	nod_info *ni;
+	node = get_nod(fs, nod, &ni);
+	node->i_mode = (node->i_mode & ~FM_IMASK) | (mode & FM_IMASK);
+	node->i_uid = uid;
+	node->i_gid = gid;
+	put_nod(ni);
+}
+
 // create a simple inode
 static uint32
 mknod_fs(filesystem *fs, uint32 parent_nod, const char *name, uint16 mode, uint16 uid, uint16 gid, uint8 major, uint8 minor, uint32 ctime, uint32 mtime)
 {
 	uint32 nod;
 	inode *node;
-	if((nod = find_dir(fs, parent_nod, name)))
-	{
-		node = get_nod(fs, nod);
-		if((node->i_mode & FM_IFMT) != (mode & FM_IFMT))
-			error_msg_and_die("node '%s' already exists and isn't of the same type", name);
-		node->i_mode = mode;
-	}
-	else
+	nod_info *ni;
+	gd_info *gi;
+
+	nod = alloc_nod(fs);
+	node = get_nod(fs, nod, &ni);
+	node->i_mode = mode;
+	add2dir(fs, parent_nod, nod, name);
+	switch(mode & FM_IFMT)
 	{
-		nod = alloc_nod(fs);
-		node = get_nod(fs, nod);
-		node->i_mode = mode;
-		add2dir(fs, parent_nod, nod, name);
-		switch(mode & FM_IFMT)
-		{
-			case FM_IFLNK:
-				mode = FM_IFLNK | FM_IRWXU | FM_IRWXG | FM_IRWXO;
-				break;
-			case FM_IFBLK:
-			case FM_IFCHR:
-				((uint8*)get_nod(fs, nod)->i_block)[0] = minor;
-				((uint8*)get_nod(fs, nod)->i_block)[1] = major;
-				break;
-			case FM_IFDIR:
-				add2dir(fs, nod, nod, ".");
-				add2dir(fs, nod, parent_nod, "..");
-				fs->gd[GRP_GROUP_OF_INODE(fs,nod)].bg_used_dirs_count++;
-				break;
-		}
+	case FM_IFLNK:
+		mode = FM_IFLNK | FM_IRWXU | FM_IRWXG | FM_IRWXO;
+		break;
+	case FM_IFBLK:
+	case FM_IFCHR:
+		((uint8*)node->i_block)[0] = minor;
+		((uint8*)node->i_block)[1] = major;
+		break;
+	case FM_IFDIR:
+		add2dir(fs, nod, nod, ".");
+		add2dir(fs, nod, parent_nod, "..");
+		get_gd(fs,GRP_GROUP_OF_INODE(fs,nod),&gi)->bg_used_dirs_count++;
+		put_gd(gi);
+		break;
 	}
 	node->i_uid = uid;
 	node->i_gid = gid;
 	node->i_atime = mtime;
 	node->i_ctime = ctime;
 	node->i_mtime = mtime;
+	put_nod(ni);
 	return nod;
 }
 
@@ -1413,33 +2050,73 @@
 mklink_fs(filesystem *fs, uint32 parent_nod, const char *name, size_t size, uint8 *b, uid_t uid, gid_t gid, uint32 ctime, uint32 mtime)
 {
 	uint32 nod = mknod_fs(fs, parent_nod, name, FM_IFLNK | FM_IRWXU | FM_IRWXG | FM_IRWXO, uid, gid, 0, 0, ctime, mtime);
-	extend_blk(fs, nod, 0, - (int)get_nod(fs, nod)->i_blocks / INOBLK);
-	get_nod(fs, nod)->i_size = size;
-	if(size <= 4 * (EXT2_TIND_BLOCK+1))
-	{
-		strncpy((char*)get_nod(fs, nod)->i_block, (char*)b, size);
+	nod_info *ni;
+	inode *node = get_nod(fs, nod, &ni);
+	inode_pos ipos;
+
+	inode_pos_init(fs, &ipos, nod, INODE_POS_TRUNCATE, NULL);
+	node->i_size = size;
+	if(size < 4 * (EXT2_TIND_BLOCK+1))
+	{
+		strncpy((char*)node->i_block, (char*)b, size);
+		((char*)node->i_block)[size+1] = '\0';
+		inode_pos_finish(fs, &ipos);
+		put_nod(ni);
 		return nod;
 	}
-	extend_blk(fs, nod, b, rndup(size, BLOCKSIZE) / BLOCKSIZE);
+	extend_inode_blk(fs, &ipos, b, rndup(size, BLOCKSIZE) / BLOCKSIZE);
+	inode_pos_finish(fs, &ipos);
+	put_nod(ni);
 	return nod;
 }
 
+static void
+fs_upgrade_rev1_largefile(filesystem *fs)
+{
+	fs->sb->s_rev_level = 1;
+	fs->sb->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
+	fs->sb->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
+}
+
+#define COPY_BLOCKS 16
+#define CB_SIZE (COPY_BLOCKS * BLOCKSIZE)
+
 // make a file from a FILE*
 static uint32
-mkfile_fs(filesystem *fs, uint32 parent_nod, const char *name, uint32 mode, size_t size, FILE *f, uid_t uid, gid_t gid, uint32 ctime, uint32 mtime)
+mkfile_fs(filesystem *fs, uint32 parent_nod, const char *name, uint32 mode, FILE *f, uid_t uid, gid_t gid, uint32 ctime, uint32 mtime)
 {
 	uint8 * b;
 	uint32 nod = mknod_fs(fs, parent_nod, name, mode|FM_IFREG, uid, gid, 0, 0, ctime, mtime);
-	extend_blk(fs, nod, 0, - (int)get_nod(fs, nod)->i_blocks / INOBLK);
-	get_nod(fs, nod)->i_size = size;
-	if (size) {
-		if(!(b = (uint8*)calloc(rndup(size, BLOCKSIZE), 1)))
-			error_msg_and_die("not enough mem to read file '%s'", name);
-		if(f)
-			fread(b, size, 1, f); // FIXME: ugly. use mmap() ...
-		extend_blk(fs, nod, b, rndup(size, BLOCKSIZE) / BLOCKSIZE);
-		free(b);
-	}
+	nod_info *ni;
+	inode *node = get_nod(fs, nod, &ni);
+	off_t size = 0;
+	size_t readbytes;
+	inode_pos ipos;
+	int fullsize;
+
+	b = malloc(CB_SIZE);
+	if (!b)
+		error_msg_and_die("mkfile_fs: out of memory");
+	inode_pos_init(fs, &ipos, nod, INODE_POS_TRUNCATE, NULL);
+	readbytes = fread(b, 1, CB_SIZE, f);
+	while (readbytes) {
+		fullsize = rndup(readbytes, BLOCKSIZE);
+		// Fill to end of block with zeros.
+		memset(b + readbytes, 0, fullsize - readbytes);
+		extend_inode_blk(fs, &ipos, b, fullsize / BLOCKSIZE);
+		size += readbytes;
+		readbytes = fread(b, 1, CB_SIZE, f);
+	}
+	if (size > 0x7fffffff) {
+		if (fs->sb->s_rev_level < 1)
+			fs_upgrade_rev1_largefile(fs);
+		fs->sb->s_feature_ro_compat |= EXT2_FEATURE_RO_COMPAT_LARGE_FILE;
+	}
+	node->i_dir_acl = size >> 32;
+	node->i_size = size;
+	inode_pos_finish(fs, &ipos);
+	put_nod(ni);
+	free(b);
 	return nod;
 }
 
@@ -1591,13 +2268,24 @@
 				dname = malloc(len + 1);
 				for(i = start; i < count; i++)
 				{
+					uint32 oldnod;
 					SNPRINTF(dname, len, "%s%lu", name, i);
-					mknod_fs(fs, nod, dname, mode, uid, gid, major, minor + (i * increment - start), ctime, mtime);
+					oldnod = find_dir(fs, nod, dname);
+					if(oldnod)
+						chmod_fs(fs, oldnod, mode, uid, gid);
+					else
+						mknod_fs(fs, nod, dname, mode, uid, gid, major, minor + (i * increment - start), ctime, mtime);
 				}
 				free(dname);
 			}
 			else
-				mknod_fs(fs, nod, name, mode, uid, gid, major, minor, ctime, mtime);
+			{
+				uint32 oldnod = find_dir(fs, nod, name);
+				if(oldnod)
+					chmod_fs(fs, oldnod, mode, uid, gid);
+				else
+					mknod_fs(fs, nod, name, mode, uid, gid, major, minor, ctime, mtime);
+			}
 		}
 	}
 	if (line)
@@ -1643,6 +2331,10 @@
 			switch(st.st_mode & S_IFMT)
 			{
 				case S_IFLNK:
+					if((st.st_mode & S_IFMT) == S_IFREG || st.st_size >= 4 * (EXT2_TIND_BLOCK+1))
+						stats->nblocks += (st.st_size + BLOCKSIZE - 1) / BLOCKSIZE;
+					stats->ninodes++;
+					break;
 				case S_IFREG:
 					if((st.st_mode & S_IFMT) == S_IFREG || st.st_size > 4 * (EXT2_TIND_BLOCK+1))
 						stats->nblocks += (st.st_size + BLOCKSIZE - 1) / BLOCKSIZE;
@@ -1657,19 +2349,33 @@
 					if(chdir(dent->d_name) < 0)
 						perror_msg_and_die(dent->d_name);
 					add2fs_from_dir(fs, this_nod, squash_uids, squash_perms, fs_timestamp, stats);
-					chdir("..");
+					if (chdir("..") == -1)
+						perror_msg_and_die("..");
+
 					break;
 				default:
 					break;
 			}
 		else
 		{
+			if((nod = find_dir(fs, this_nod, name)))
+			{
+				error_msg("ignoring duplicate entry %s", name);
+				if(S_ISDIR(st.st_mode)) {
+					if(chdir(dent->d_name) < 0)
+						perror_msg_and_die(name);
+					add2fs_from_dir(fs, nod, squash_uids, squash_perms, fs_timestamp, stats);
+					if (chdir("..") == -1)
+						perror_msg_and_die("..");
+				}
+				continue;
+			}
 			save_nod = 0;
 			/* Check for hardlinks */
 			if (!S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode) && st.st_nlink > 1) {
-				int32 hdlink = is_hardlink(st.st_ino);
+				int32 hdlink = is_hardlink(fs, st.st_ino);
 				if (hdlink >= 0) {
-					add2dir(fs, this_nod, hdlinks.hdl[hdlink].dst_nod, name);
+					add2dir(fs, this_nod, fs->hdlinks.hdl[hdlink].dst_nod, name);
 					continue;
 				} else {
 					save_nod = 1;
@@ -1697,8 +2403,12 @@
 					free(lnk);
 					break;
 				case S_IFREG:
-					fh = xfopen(dent->d_name, "rb");
-					nod = mkfile_fs(fs, this_nod, name, mode, st.st_size, fh, uid, gid, ctime, mtime);
+					fh = fopen(dent->d_name, "rb");
+					if (!fh) {
+						error_msg("Unable to open file %s", dent->d_name);
+						break;
+					}
+					nod = mkfile_fs(fs, this_nod, name, mode, fh, uid, gid, ctime, mtime);
 					fclose(fh);
 					break;
 				case S_IFDIR:
@@ -1706,199 +2416,128 @@
 					if(chdir(dent->d_name) < 0)
 						perror_msg_and_die(name);
 					add2fs_from_dir(fs, nod, squash_uids, squash_perms, fs_timestamp, stats);
-					chdir("..");
+					if (chdir("..") == -1)
+						perror_msg_and_die("..");
 					break;
 				default:
 					error_msg("ignoring entry %s", name);
 			}
 			if (save_nod) {
-				if (hdlinks.count == hdlink_cnt) {
-					if ((hdlinks.hdl = 
-						 realloc (hdlinks.hdl, (hdlink_cnt + HDLINK_CNT) *
+				if (fs->hdlinks.count == fs->hdlink_cnt) {
+					if ((fs->hdlinks.hdl =
+						 realloc (fs->hdlinks.hdl, (fs->hdlink_cnt + HDLINK_CNT) *
 								  sizeof (struct hdlink_s))) == NULL) {
 						error_msg_and_die("Not enough memory");
 					}
-					hdlink_cnt += HDLINK_CNT;
+					fs->hdlink_cnt += HDLINK_CNT;
 				}
-				hdlinks.hdl[hdlinks.count].src_inode = st.st_ino;
-				hdlinks.hdl[hdlinks.count].dst_nod = nod;
-				hdlinks.count++;
+				fs->hdlinks.hdl[fs->hdlinks.count].src_inode = st.st_ino;
+				fs->hdlinks.hdl[fs->hdlinks.count].dst_nod = nod;
+				fs->hdlinks.count++;
 			}
 		}
 	}
 	closedir(dh);
 }
 
-// endianness swap of x-indirect blocks
+// Copy size blocks from src to dst, putting holes in the output
+// file (if possible) if the input block is all zeros.
+// Copy size blocks from src to dst, putting holes in the output
+// file (if possible) if the input block is all zeros.
 static void
-swap_goodblocks(filesystem *fs, inode *nod)
+copy_file(filesystem *fs, FILE *dst, FILE *src, size_t size)
 {
-	uint32 i,j;
-	int done=0;
-	uint32 *b,*b2;
+	uint8 *b;
 
-	uint32 nblk = nod->i_blocks / INOBLK;
-	if((nod->i_size && !nblk) || ((nod->i_mode & FM_IFBLK) == FM_IFBLK) || ((nod->i_mode & FM_IFCHR) == FM_IFCHR))
-		for(i = 0; i <= EXT2_TIND_BLOCK; i++)
-			nod->i_block[i] = swab32(nod->i_block[i]);
-	if(nblk <= EXT2_IND_BLOCK)
-		return;
-	swap_block(get_blk(fs, nod->i_block[EXT2_IND_BLOCK]));
-	if(nblk <= EXT2_DIND_BLOCK + BLOCKSIZE/4)
-		return;
-	/* Currently this will fail b'cos the number of blocks as stored
-	   in i_blocks also includes the indirection blocks (see
-	   walk_bw). But this function assumes that i_blocks only
-	   stores the count of data blocks ( Actually according to
-	   "Understanding the Linux Kernel" (Table 17-3 p502 1st Ed)
-	   i_blocks IS supposed to store the count of data blocks). so
-	   with a file of size 268K nblk would be 269.The above check
-	   will be false even though double indirection hasn't been
-	   started.This is benign as 0 means block 0 which has been
-	   zeroed out and therefore points back to itself from any offset
-	 */
-	// FIXME: I have fixed that, but I have the feeling the rest of
-	// ths function needs to be fixed for the same reasons - Xav
-	assert(nod->i_block[EXT2_DIND_BLOCK] != 0);
-	for(i = 0; i < BLOCKSIZE/4; i++)
-		if(nblk > EXT2_IND_BLOCK + BLOCKSIZE/4 + (BLOCKSIZE/4)*i )
-			swap_block(get_blk(fs, ((uint32*)get_blk(fs, nod->i_block[EXT2_DIND_BLOCK]))[i]));
-	swap_block(get_blk(fs, nod->i_block[EXT2_DIND_BLOCK]));
-	if(nblk <= EXT2_IND_BLOCK + BLOCKSIZE/4 + BLOCKSIZE/4 * BLOCKSIZE/4)
-		return;
-	/* Adding support for triple indirection */
-	b = (uint32*)get_blk(fs,nod->i_block[EXT2_TIND_BLOCK]);
-	for(i=0;i < BLOCKSIZE/4 && !done ; i++) {
-		b2 = (uint32*)get_blk(fs,b[i]); 
-		for(j=0; j<BLOCKSIZE/4;j++) {
-			if (nblk > ( EXT2_IND_BLOCK + BLOCKSIZE/4 + 
-				     (BLOCKSIZE/4)*(BLOCKSIZE/4) + 
-				     i*(BLOCKSIZE/4)*(BLOCKSIZE/4) + 
-				     j*(BLOCKSIZE/4)) ) 
-			  swap_block(get_blk(fs,b2[j]));
-			else {
-			  done = 1;
-			  break;
-			}
+	b = malloc(BLOCKSIZE);
+	if (!b)
+		error_msg_and_die("copy_file: out of memory");
+	if (fseek(src, 0, SEEK_SET))
+		perror_msg_and_die("fseek");
+	if (ftruncate(fileno(dst), 0))
+		perror_msg_and_die("copy_file: ftruncate");
+	while (size > 0) {
+		if (fread(b, BLOCKSIZE, 1, src) != 1)
+			perror_msg_and_die("copy failed on read");
+		if ((dst != stdout) && fs->holes && is_blk_empty(b)) {
+			/* Empty block, just skip it */
+			if (fseek(dst, BLOCKSIZE, SEEK_CUR))
+				perror_msg_and_die("fseek");
+		} else {
+			if (fwrite(b, BLOCKSIZE, 1, dst) != 1)
+				perror_msg_and_die("copy failed on write");
 		}
-		swap_block((uint8 *)b2);
+		size--;
 	}
-	swap_block((uint8 *)b);
-	return;
+	free(b);
 }
 
-static void
-swap_badblocks(filesystem *fs, inode *nod)
+// Allocate a new filesystem structure, allocate internal memory,
+// and initialize the contents.
+static filesystem *
+alloc_fs(int swapit, char *fname, uint32 nbblocks, FILE *srcfile)
 {
-	uint32 i,j;
-	int done=0;
-	uint32 *b,*b2;
+	filesystem *fs;
+	struct stat srcstat, dststat;
 
-	uint32 nblk = nod->i_blocks / INOBLK;
-	if((nod->i_size && !nblk) || ((nod->i_mode & FM_IFBLK) == FM_IFBLK) || ((nod->i_mode & FM_IFCHR) == FM_IFCHR))
-		for(i = 0; i <= EXT2_TIND_BLOCK; i++)
-			nod->i_block[i] = swab32(nod->i_block[i]);
-	if(nblk <= EXT2_IND_BLOCK)
-		return;
-	swap_block(get_blk(fs, nod->i_block[EXT2_IND_BLOCK]));
-	if(nblk <= EXT2_DIND_BLOCK + BLOCKSIZE/4)
-		return;
-	/* See comment in swap_goodblocks */
-	assert(nod->i_block[EXT2_DIND_BLOCK] != 0);
-	swap_block(get_blk(fs, nod->i_block[EXT2_DIND_BLOCK]));
-	for(i = 0; i < BLOCKSIZE/4; i++)
-		if(nblk > EXT2_IND_BLOCK + BLOCKSIZE/4 + (BLOCKSIZE/4)*i )
-			swap_block(get_blk(fs, ((uint32*)get_blk(fs, nod->i_block[EXT2_DIND_BLOCK]))[i]));
-	if(nblk <= EXT2_IND_BLOCK + BLOCKSIZE/4 + BLOCKSIZE/4 * BLOCKSIZE/4)
-		return;
-	/* Adding support for triple indirection */
-	b = (uint32*)get_blk(fs,nod->i_block[EXT2_TIND_BLOCK]);
-	swap_block((uint8 *)b);
-	for(i=0;i < BLOCKSIZE/4 && !done ; i++) {
-		b2 = (uint32*)get_blk(fs,b[i]); 
-		swap_block((uint8 *)b2);
-		for(j=0; j<BLOCKSIZE/4;j++) {
-			if (nblk > ( EXT2_IND_BLOCK + BLOCKSIZE/4 + 
-				     (BLOCKSIZE/4)*(BLOCKSIZE/4) + 
-				     i*(BLOCKSIZE/4)*(BLOCKSIZE/4) + 
-				     j*(BLOCKSIZE/4)) ) 
-			  swap_block(get_blk(fs,b2[j]));
-			else {
-			  done = 1;
-			  break;
-			}
-		}
-	}
-	return;
-}
+	fs = malloc(sizeof(*fs));
+	if (!fs)
+		error_msg_and_die("not enough memory for filesystem");
+	memset(fs, 0, sizeof(*fs));
+	fs->swapit = swapit;
+	cache_init(&fs->blks, MAX_FREE_CACHE_BLOCKS, blk_elem_val, blk_freed);
+	cache_init(&fs->gds, MAX_FREE_CACHE_GDS, gd_elem_val, gd_freed);
+	cache_init(&fs->blkmaps, MAX_FREE_CACHE_BLOCKMAPS,
+		   blkmap_elem_val, blkmap_freed);
+	cache_init(&fs->inodes, MAX_FREE_CACHE_INODES,
+		   inode_elem_val, inode_freed);
+	fs->hdlink_cnt = HDLINK_CNT;
+	fs->hdlinks.hdl = calloc(sizeof(struct hdlink_s), fs->hdlink_cnt);
+	if (!fs->hdlinks.hdl)
+		error_msg_and_die("Not enough memory");
+	fs->hdlinks.count = 0 ;
 
-// endianness swap of the whole filesystem
-static void
-swap_goodfs(filesystem *fs)
-{
-	uint32 i;
-	for(i = 1; i < fs->sb.s_inodes_count; i++)
-	{
-		inode *nod = get_nod(fs, i);
-		if(nod->i_mode & FM_IFDIR)
-		{
-			blockwalker bw;
-			uint32 bk;
-			init_bw(&bw);
-			while((bk = walk_bw(fs, i, &bw, 0, 0)) != WALK_END)
-			{
-				directory *d;
-				uint8 *b;
-				b = get_blk(fs, bk);
-				for(d = (directory*)b; (int8*)d + sizeof(*d) < (int8*)b + BLOCKSIZE; d = (directory*)((int8*)d + swab16(d->d_rec_len)))
-					swap_dir(d);
-			}
-		}
-		swap_goodblocks(fs, nod);
-		swap_nod(nod);
-	}
-	for(i=0;i<GRP_NBGROUPS(fs);i++)
-		swap_gd(&(fs->gd[i]));
-	swap_sb(&fs->sb);
+	if (strcmp(fname, "-") == 0)
+		fs->f = tmpfile();
+	else if (srcfile) {
+		if (fstat(fileno(srcfile), &srcstat))
+			perror_msg_and_die("fstat srcfile");
+		if (stat(fname, &dststat) == 0
+		    && srcstat.st_ino == dststat.st_ino
+		    && srcstat.st_dev == dststat.st_dev)
+		  {
+			// source and destination are the same file, don't
+			// truncate or copy, just use the file.
+			fs->f = fopen(fname, "r+b");
+		} else {
+			fs->f = fopen(fname, "w+b");
+			if (fs->f)
+				copy_file(fs, fs->f, srcfile, nbblocks);
+		}
+	} else
+		fs->f = fopen(fname, "w+b");
+	if (!fs->f)
+		perror_msg_and_die("opening %s", fname);
+	return fs;
 }
 
+/* Make sure the output file is the right size */
 static void
-swap_badfs(filesystem *fs)
+set_file_size(filesystem *fs)
 {
-	uint32 i;
-	swap_sb(&fs->sb);
-	for(i=0;i<GRP_NBGROUPS(fs);i++)
-		swap_gd(&(fs->gd[i]));
-	for(i = 1; i < fs->sb.s_inodes_count; i++)
-	{
-		inode *nod = get_nod(fs, i);
-		swap_nod(nod);
-		swap_badblocks(fs, nod);
-		if(nod->i_mode & FM_IFDIR)
-		{
-			blockwalker bw;
-			uint32 bk;
-			init_bw(&bw);
-			while((bk = walk_bw(fs, i, &bw, 0, 0)) != WALK_END)
-			{
-				directory *d;
-				uint8 *b;
-				b = get_blk(fs, bk);
-				for(d = (directory*)b; (int8*)d + sizeof(*d) < (int8*)b + BLOCKSIZE; d = (directory*)((int8*)d + d->d_rec_len))
-					swap_dir(d);
-			}
-		}
-	}
+	if (ftruncate(fileno(fs->f),
+		      ((off_t) fs->sb->s_blocks_count) * BLOCKSIZE))
+		perror_msg_and_die("set_file_size: ftruncate");
 }
 
 // initialize an empty filesystem
 static filesystem *
-init_fs(int nbblocks, int nbinodes, int nbresrvd, int holes, uint32 fs_timestamp)
+init_fs(int nbblocks, int nbinodes, int nbresrvd, int holes,
+	uint32 fs_timestamp, uint32 creator_os, int swapit, char *fname)
 {
 	uint32 i;
 	filesystem *fs;
-	directory *d;
-	uint8 * b;
+	dirwalker dw;
 	uint32 nod, first_block;
 	uint32 nbgroups,nbinodes_per_group,overhead_per_group,free_blocks,
 		free_blocks_per_group,nbblocks_per_group,min_nbgroups;
@@ -1906,6 +2545,11 @@
 	uint32 j;
 	uint8 *bbm,*ibm;
 	inode *itab0;
+	blk_info *bi;
+	nod_info *ni;
+	groupdescriptor *gd;
+	gd_info *gi;
+	inode_pos ipos;
 	
 	if(nbresrvd < 0)
 		error_msg_and_die("reserved blocks value is invalid. Note: options have changed, see --help or the man page.");
@@ -1919,10 +2563,14 @@
 	 */
 	min_nbgroups = (nbinodes + INODES_PER_GROUP - 1) / INODES_PER_GROUP;
 
+	/* On filesystems with 1k block size, the bootloader area uses a full
+	 * block. For 2048 and up, the superblock can be fitted into block 0.
+	 */
+	first_block = (BLOCKSIZE == 1024);
+
 	/* nbblocks is the total number of blocks in the filesystem.
 	 * a block group can have no more than 8192 blocks.
 	 */
-	first_block = (BLOCKSIZE == 1024);
 	nbgroups = (nbblocks - first_block + BLOCKS_PER_GROUP - 1) / BLOCKS_PER_GROUP;
 	if(nbgroups < min_nbgroups) nbgroups = min_nbgroups;
 	nbblocks_per_group = rndup((nbblocks - first_block + nbgroups - 1)/nbgroups, 8);
@@ -1934,51 +2582,59 @@
 	gdsz = rndup(nbgroups*sizeof(groupdescriptor),BLOCKSIZE)/BLOCKSIZE;
 	itblsz = nbinodes_per_group * sizeof(inode)/BLOCKSIZE;
 	overhead_per_group = 3 /*sb,bbm,ibm*/ + gdsz + itblsz;
-	if((uint32)nbblocks - 1 < overhead_per_group * nbgroups)
-		error_msg_and_die("too much overhead, try fewer inodes or more blocks. Note: options have changed, see --help or the man page.");
-	free_blocks = nbblocks - overhead_per_group*nbgroups - 1 /*boot block*/;
+	free_blocks = nbblocks - overhead_per_group*nbgroups - first_block;
 	free_blocks_per_group = nbblocks_per_group - overhead_per_group;
+	if(free_blocks < 0)
+		error_msg_and_die("too much overhead, try fewer inodes or more blocks. Note: options have changed, see --help or the man page.");
 
-	if(!(fs = (filesystem*)calloc(nbblocks, BLOCKSIZE)))
-		error_msg_and_die("not enough memory for filesystem");
+	fs = alloc_fs(swapit, fname, nbblocks, NULL);
+	fs->sb = calloc(1, SUPERBLOCK_SIZE);
+	if (!fs->sb)
+		error_msg_and_die("error allocating header memory");
 
 	// create the superblock for an empty filesystem
-	fs->sb.s_inodes_count = nbinodes_per_group * nbgroups;
-	fs->sb.s_blocks_count = nbblocks;
-	fs->sb.s_r_blocks_count = nbresrvd;
-	fs->sb.s_free_blocks_count = free_blocks;
-	fs->sb.s_free_inodes_count = fs->sb.s_inodes_count - EXT2_FIRST_INO + 1;
-	fs->sb.s_first_data_block = first_block;
-	fs->sb.s_log_block_size = BLOCKSIZE >> 11;
-	fs->sb.s_log_frag_size = BLOCKSIZE >> 11;
-	fs->sb.s_blocks_per_group = nbblocks_per_group;
-	fs->sb.s_frags_per_group = nbblocks_per_group;
-	fs->sb.s_inodes_per_group = nbinodes_per_group;
-	fs->sb.s_wtime = fs_timestamp;
-	fs->sb.s_magic = EXT2_MAGIC_NUMBER;
-	fs->sb.s_lastcheck = fs_timestamp;
+	fs->sb->s_inodes_count = nbinodes_per_group * nbgroups;
+	fs->sb->s_blocks_count = nbblocks;
+	fs->sb->s_r_blocks_count = nbresrvd;
+	fs->sb->s_free_blocks_count = free_blocks;
+	fs->sb->s_free_inodes_count = fs->sb->s_inodes_count - EXT2_FIRST_INO + 1;
+	fs->sb->s_first_data_block = first_block;
+	fs->sb->s_log_block_size = BLOCKSIZE >> 11;
+	fs->sb->s_log_frag_size = BLOCKSIZE >> 11;
+	fs->sb->s_blocks_per_group = nbblocks_per_group;
+	fs->sb->s_frags_per_group = nbblocks_per_group;
+	fs->sb->s_inodes_per_group = nbinodes_per_group;
+	fs->sb->s_wtime = fs_timestamp;
+	fs->sb->s_magic = EXT2_MAGIC_NUMBER;
+	fs->sb->s_lastcheck = fs_timestamp;
+	fs->sb->s_creator_os = creator_os;
+
+	set_file_size(fs);
 
 	// set up groupdescriptors
-	for(i=0, bbmpos=gdsz+2, ibmpos=bbmpos+1, itblpos=ibmpos+1;
+	for(i=0, bbmpos=first_block+1+gdsz, ibmpos=bbmpos+1, itblpos=ibmpos+1;
 		i<nbgroups;
 		i++, bbmpos+=nbblocks_per_group, ibmpos+=nbblocks_per_group, itblpos+=nbblocks_per_group)
 	{
+		gd = get_gd(fs, i, &gi);
+
 		if(free_blocks > free_blocks_per_group) {
-			fs->gd[i].bg_free_blocks_count = free_blocks_per_group;
+			gd->bg_free_blocks_count = free_blocks_per_group;
 			free_blocks -= free_blocks_per_group;
 		} else {
-			fs->gd[i].bg_free_blocks_count = free_blocks;
+			gd->bg_free_blocks_count = free_blocks;
 			free_blocks = 0; // this is the last block group
 		}
 		if(i)
-			fs->gd[i].bg_free_inodes_count = nbinodes_per_group;
+			gd->bg_free_inodes_count = nbinodes_per_group;
 		else
-			fs->gd[i].bg_free_inodes_count = nbinodes_per_group -
+			gd->bg_free_inodes_count = nbinodes_per_group -
 							EXT2_FIRST_INO + 2;
-		fs->gd[i].bg_used_dirs_count = 0;
-		fs->gd[i].bg_block_bitmap = bbmpos;
-		fs->gd[i].bg_inode_bitmap = ibmpos;
-		fs->gd[i].bg_inode_table = itblpos;
+		gd->bg_used_dirs_count = 0;
+		gd->bg_block_bitmap = bbmpos;
+		gd->bg_inode_bitmap = ibmpos;
+		gd->bg_inode_table = itblpos;
+		put_gd(gi);
 	}
 
 	/* Mark non-filesystem blocks and inodes as allocated */
@@ -1984,110 +2640,143 @@
 	/* Mark non-filesystem blocks and inodes as allocated */
 	/* Mark system blocks and inodes as allocated         */
 	for(i = 0; i<nbgroups;i++) {
-
 		/* Block bitmap */
-		bbm = get_blk(fs,fs->gd[i].bg_block_bitmap);	
+		gd = get_gd(fs, i, &gi);
+		bbm = GRP_GET_GROUP_BBM(fs, gd, &bi);
 		//non-filesystem blocks
-		for(j = fs->gd[i].bg_free_blocks_count
+		for(j = gd->bg_free_blocks_count
 		        + overhead_per_group + 1; j <= BLOCKSIZE * 8; j++)
 			allocate(bbm, j); 
 		//system blocks
 		for(j = 1; j <= overhead_per_group; j++)
 			allocate(bbm, j); 
-		
+		GRP_PUT_GROUP_BBM(bi);
+
 		/* Inode bitmap */
-		ibm = get_blk(fs,fs->gd[i].bg_inode_bitmap);	
+		ibm = GRP_GET_GROUP_IBM(fs, gd, &bi);
 		//non-filesystem inodes
-		for(j = fs->sb.s_inodes_per_group+1; j <= BLOCKSIZE * 8; j++)
+		for(j = fs->sb->s_inodes_per_group+1; j <= BLOCKSIZE * 8; j++)
 			allocate(ibm, j);
 
 		//system inodes
 		if(i == 0)
 			for(j = 1; j < EXT2_FIRST_INO; j++)
 				allocate(ibm, j);
+		GRP_PUT_GROUP_IBM(bi);
+		put_gd(gi);
 	}
 
 	// make root inode and directory
 	/* We have groups now. Add the root filesystem in group 0 */
 	/* Also increment the directory count for group 0 */
-	fs->gd[0].bg_free_inodes_count--;
-	fs->gd[0].bg_used_dirs_count = 1;
-	itab0 = (inode *)get_blk(fs,fs->gd[0].bg_inode_table);
-	itab0[EXT2_ROOT_INO-1].i_mode = FM_IFDIR | FM_IRWXU | FM_IRGRP | FM_IROTH | FM_IXGRP | FM_IXOTH; 
-	itab0[EXT2_ROOT_INO-1].i_ctime = fs_timestamp;
-	itab0[EXT2_ROOT_INO-1].i_mtime = fs_timestamp;
-	itab0[EXT2_ROOT_INO-1].i_atime = fs_timestamp;
-	itab0[EXT2_ROOT_INO-1].i_size = BLOCKSIZE;
-	itab0[EXT2_ROOT_INO-1].i_links_count = 2;
-
-	if(!(b = get_workblk()))
-		error_msg_and_die("get_workblk() failed.");
-	d = (directory*)b;
-	d->d_inode = EXT2_ROOT_INO;
-	d->d_rec_len = sizeof(directory)+4;
-	d->d_name_len = 1;
-	strcpy(d->d_name, ".");
-	d = (directory*)(b + d->d_rec_len);
-	d->d_inode = EXT2_ROOT_INO;
-	d->d_rec_len = BLOCKSIZE - (sizeof(directory)+4);
-	d->d_name_len = 2;
-	strcpy(d->d_name, "..");
-	extend_blk(fs, EXT2_ROOT_INO, b, 1);
+	gd = get_gd(fs, 0, &gi);
+	gd->bg_free_inodes_count--;
+	gd->bg_used_dirs_count = 1;
+	put_gd(gi);
+	itab0 = get_nod(fs, EXT2_ROOT_INO, &ni);
+	itab0->i_mode = FM_IFDIR | FM_IRWXU | FM_IRGRP | FM_IROTH | FM_IXGRP | FM_IXOTH;
+	itab0->i_ctime = fs_timestamp;
+	itab0->i_mtime = fs_timestamp;
+	itab0->i_atime = fs_timestamp;
+	itab0->i_size = BLOCKSIZE;
+	itab0->i_links_count = 2;
+	put_nod(ni);
+
+	new_dir(fs, EXT2_ROOT_INO, ".", 1, &dw);
+	shrink_dir(&dw, EXT2_ROOT_INO, "..", 2);
+	next_dir(&dw); // Force the data into the buffer
+	inode_pos_init(fs, &ipos, EXT2_ROOT_INO, INODE_POS_EXTEND, NULL);
+	extend_inode_blk(fs, &ipos, dir_data(&dw), 1);
+	inode_pos_finish(fs, &ipos);
+	put_dir(&dw);
 
-	// make lost+found directory and reserve blocks
-	if(fs->sb.s_r_blocks_count)
+	// make lost+found directory
+	if(fs->sb->s_r_blocks_count)
 	{
-		nod = mkdir_fs(fs, EXT2_ROOT_INO, "lost+found", FM_IRWXU, 0, 0, fs_timestamp, fs_timestamp);
+		inode *node;
+		uint8 *b;
+
+		nod = mkdir_fs(fs, EXT2_ROOT_INO, "lost+found", FM_IRWXU,
+			       0, 0, fs_timestamp, fs_timestamp);
+		b = get_workblk();
 		memset(b, 0, BLOCKSIZE);
 		((directory*)b)->d_rec_len = BLOCKSIZE;
-		/* We run into problems with e2fsck if directory lost+found grows
-		 * bigger than this. Need to find out why this happens - sundar
-		 */
-		if (fs->sb.s_r_blocks_count > fs->sb.s_blocks_count * MAX_RESERVED_BLOCKS ) 
-			fs->sb.s_r_blocks_count = fs->sb.s_blocks_count * MAX_RESERVED_BLOCKS;
-		for(i = 1; i < fs->sb.s_r_blocks_count; i++)
-			extend_blk(fs, nod, b, 1);
-		get_nod(fs, nod)->i_size = fs->sb.s_r_blocks_count * BLOCKSIZE;
+		inode_pos_init(fs, &ipos, nod, INODE_POS_EXTEND, NULL);
+		// It is always 16 blocks to start out with
+		for(i = 1; i < 16; i++)
+			extend_inode_blk(fs, &ipos, b, 1);
+		inode_pos_finish(fs, &ipos);
+		free_workblk(b);
+		node = get_nod(fs, nod, &ni);
+		node->i_size = 16 * BLOCKSIZE;
+		put_nod(ni);
 	}
-	free_workblk(b);
 
 	// administrative info
-	fs->sb.s_state = 1;
-	fs->sb.s_max_mnt_count = 20;
+	fs->sb->s_state = 1;
+	fs->sb->s_max_mnt_count = 20;
 
 	// options for me
-	if(holes)
-		fs->sb.s_reserved[200] |= OP_HOLES;
+	fs->holes = holes;
 	
 	return fs;
 }
 
 // loads a filesystem from disk
 static filesystem *
-load_fs(FILE * fh, int swapit)
+load_fs(FILE *fh, int swapit, char *fname)
 {
-	size_t fssize;
+	off_t fssize;
 	filesystem *fs;
-	if((fseek(fh, 0, SEEK_END) < 0) || ((ssize_t)(fssize = ftell(fh)) == -1))
+
+	if((fseek(fh, 0, SEEK_END) < 0) || ((fssize = ftello(fh)) == -1))
 		perror_msg_and_die("input filesystem image");
 	rewind(fh);
-	fssize = (fssize + BLOCKSIZE - 1) / BLOCKSIZE;
+	if ((fssize % BLOCKSIZE) != 0)
+		error_msg_and_die("Input file not a multiple of block size");
+	fssize /= BLOCKSIZE;
 	if(fssize < 16) // totally arbitrary
 		error_msg_and_die("too small filesystem");
-	if(!(fs = (filesystem*)calloc(fssize, BLOCKSIZE)))
-		error_msg_and_die("not enough memory for filesystem");
-	if(fread(fs, BLOCKSIZE, fssize, fh) != fssize)
-		perror_msg_and_die("input filesystem image");
+	fs = alloc_fs(swapit, fname, fssize, fh);
+
+	/* Read and check the superblock, then read the superblock
+	 * and all the group descriptors */
+	fs->sb = malloc(SUPERBLOCK_SIZE);
+	if (!fs->sb)
+		error_msg_and_die("error allocating header memory");
+	if (fseek(fs->f, SUPERBLOCK_OFFSET, SEEK_SET))
+		perror_msg_and_die("fseek");
+	if (fread(fs->sb, SUPERBLOCK_SIZE, 1, fs->f) != 1)
+		perror_msg_and_die("fread filesystem image superblock");
 	if(swapit)
-		swap_badfs(fs);
-	if(fs->sb.s_rev_level || (fs->sb.s_magic != EXT2_MAGIC_NUMBER))
+		swap_sb(fs->sb);
+
+	if((fs->sb->s_rev_level > 1) || (fs->sb->s_magic != EXT2_MAGIC_NUMBER))
 		error_msg_and_die("not a suitable ext2 filesystem");
+	if (fs->sb->s_rev_level > 0) {
+		if (fs->sb->s_first_ino != EXT2_GOOD_OLD_FIRST_INO)
+			error_msg_and_die("First inode incompatible");
+		if (fs->sb->s_inode_size != EXT2_GOOD_OLD_INODE_SIZE)
+			error_msg_and_die("inode size incompatible");
+		if (fs->sb->s_feature_compat)
+			error_msg_and_die("Unsupported compat features");
+		if (fs->sb->s_feature_incompat)
+			error_msg_and_die("Unsupported incompat features");
+		if (fs->sb->s_feature_ro_compat
+		    & ~EXT2_FEATURE_RO_COMPAT_LARGE_FILE)
+			error_msg_and_die("Unsupported ro compat features");
+	}
+
+	set_file_size(fs);
 	return fs;
 }
 
 static void
 free_fs(filesystem *fs)
 {
+	free(fs->hdlinks.hdl);
+	fclose(fs->f);
+	free(fs->sb);
 	free(fs);
 }
 
@@ -2123,16 +2812,23 @@
 {
 	blockwalker bw;
 	uint32 bk;
-	int32 fsize = get_nod(fs, nod)->i_size;
+	nod_info *ni;
+	inode *node = get_nod(fs, nod, &ni);
+	int32 fsize = node->i_size;
+	blk_info *bi;
+
 	init_bw(&bw);
 	while((bk = walk_bw(fs, nod, &bw, 0, 0)) != WALK_END)
 	{
 		if(fsize <= 0)
 			error_msg_and_die("wrong size while saving inode %d", nod);
-		if(fwrite(get_blk(fs, bk), (fsize > BLOCKSIZE) ? BLOCKSIZE : fsize, 1, f) != 1)
+		if(fwrite(get_blk(fs, bk, &bi),
+			  (fsize > BLOCKSIZE) ? BLOCKSIZE : fsize, 1, f) != 1)
 			error_msg_and_die("error while saving inode %d", nod);
+		put_blk(bi);
 		fsize -= BLOCKSIZE;
 	}
+	put_nod(ni);
 }
 
 
@@ -2141,8 +2837,11 @@
 print_dev(filesystem *fs, uint32 nod)
 {
 	int minor, major;
-	minor = ((uint8*)get_nod(fs, nod)->i_block)[0];
-	major = ((uint8*)get_nod(fs, nod)->i_block)[1];
+	nod_info *ni;
+	inode *node = get_nod(fs, nod, &ni);
+	minor = ((uint8*)node->i_block)[0];
+	major = ((uint8*)node->i_block)[1];
+	put_nod(ni);
 	printf("major: %d, minor: %d\n", major, minor);
 }
 
@@ -2157,17 +2856,15 @@
 	while((bk = walk_bw(fs, nod, &bw, 0, 0)) != WALK_END)
 	{
 		directory *d;
-		uint8 *b;
-		b = get_blk(fs, bk);
-		for(d = (directory*)b; (int8*)d + sizeof(*d) < (int8*)b + BLOCKSIZE; d = (directory*)((int8*)d + d->d_rec_len))
+		dirwalker dw;
+		for (d = get_dir(fs, bk, &dw); d; d = next_dir(&dw))
 			if(d->d_inode)
 			{
-				int i;
 				printf("entry '");
-				for(i = 0; i < d->d_name_len; i++)
-					putchar(d->d_name[i]);
+				fwrite(dir_name(&dw), 1, d->d_name_len, stdout);
 				printf("' (inode %d): rec_len: %d (name_len: %d)\n", d->d_inode, d->d_rec_len, d->d_name_len);
 			}
+		put_dir(&dw);
 	}
 }
 
@@ -2175,14 +2872,18 @@
 static void
 print_link(filesystem *fs, uint32 nod)
 {
-	if(!get_nod(fs, nod)->i_blocks)
-		printf("links to '%s'\n", (char*)get_nod(fs, nod)->i_block);
+	nod_info *ni;
+	inode *node = get_nod(fs, nod, &ni);
+
+	if(!node->i_blocks)
+		printf("links to '%s'\n", (char*)node->i_block);
 	else
 	{
 		printf("links to '");
 		write_blocks(fs, nod, stdout);
 		printf("'\n");
 	}
+	put_nod(ni);
 }
 
 // make a ls-like printout of permissions
@@ -2251,8 +2952,13 @@
 {
 	char *s;
 	char perms[11];
-	if(!get_nod(fs, nod)->i_mode)
-		return;
+	nod_info *ni;
+	inode *node = get_nod(fs, nod, &ni);
+	blk_info *bi;
+	gd_info *gi;
+
+	if(!node->i_mode)
+		goto out;
 	switch(nod)
 	{
 		case EXT2_BAD_INO:
@@ -2274,15 +2980,18 @@
 		default:
 			s = (nod >= EXT2_FIRST_INO) ? "normal" : "unknown reserved"; 
 	}
-	printf("inode %d (%s, %d links): ", nod, s, get_nod(fs, nod)->i_links_count);
-	if(!allocated(GRP_GET_INODE_BITMAP(fs,nod), GRP_IBM_OFFSET(fs,nod)))
+	printf("inode %d (%s, %d links): ", nod, s, node->i_links_count);
+	if(!allocated(GRP_GET_INODE_BITMAP(fs,nod,&bi,&gi), GRP_IBM_OFFSET(fs,nod)))
 	{
+		GRP_PUT_INODE_BITMAP(bi,gi);
 		printf("unallocated\n");
-		return;
+		goto out;
 	}
-	make_perms(get_nod(fs, nod)->i_mode, perms);
-	printf("%s,  size: %d byte%s (%d block%s)\n", perms, plural(get_nod(fs, nod)->i_size), plural(get_nod(fs, nod)->i_blocks / INOBLK));
-	switch(get_nod(fs, nod)->i_mode & FM_IFMT)
+	GRP_PUT_INODE_BITMAP(bi,gi);
+	make_perms(node->i_mode, perms);
+	printf("%s,  size: %d byte%s (%d block%s)\n", perms,
+	       plural(node->i_size), plural(node->i_blocks / INOBLK));
+	switch(node->i_mode & FM_IFMT)
 	{
 		case FM_IFSOCK:
 			list_blocks(fs, nod);
@@ -2310,6 +3019,8 @@
 			list_blocks(fs, nod);
 	}
 	printf("Done with inode %d\n",nod);
+out:
+	put_nod(ni);
 }
 
 // describes various fields in a filesystem
@@ -2317,49 +3028,65 @@
 print_fs(filesystem *fs)
 {
 	uint32 i;
+	blk_info *bi;
+	groupdescriptor *gd;
+	gd_info *gi;
 	uint8 *ibm;
 
 	printf("%d blocks (%d free, %d reserved), first data block: %d\n",
-	       fs->sb.s_blocks_count, fs->sb.s_free_blocks_count,
-	       fs->sb.s_r_blocks_count, fs->sb.s_first_data_block);
-	printf("%d inodes (%d free)\n", fs->sb.s_inodes_count,
-	       fs->sb.s_free_inodes_count);
+	       fs->sb->s_blocks_count, fs->sb->s_free_blocks_count,
+	       fs->sb->s_r_blocks_count, fs->sb->s_first_data_block);
+	printf("%d inodes (%d free)\n", fs->sb->s_inodes_count,
+	       fs->sb->s_free_inodes_count);
 	printf("block size = %d, frag size = %d\n",
-	       fs->sb.s_log_block_size ? (fs->sb.s_log_block_size << 11) : 1024,
-	       fs->sb.s_log_frag_size ? (fs->sb.s_log_frag_size << 11) : 1024);
+	       fs->sb->s_log_block_size ? (fs->sb->s_log_block_size << 11) : 1024,
+	       fs->sb->s_log_frag_size ? (fs->sb->s_log_frag_size << 11) : 1024);
 	printf("number of groups: %d\n",GRP_NBGROUPS(fs));
 	printf("%d blocks per group,%d frags per group,%d inodes per group\n",
-	     fs->sb.s_blocks_per_group, fs->sb.s_frags_per_group,
-	     fs->sb.s_inodes_per_group);
+	     fs->sb->s_blocks_per_group, fs->sb->s_frags_per_group,
+	     fs->sb->s_inodes_per_group);
 	printf("Size of inode table: %d blocks\n",
-		(int)(fs->sb.s_inodes_per_group * sizeof(inode) / BLOCKSIZE));
+		(int)(fs->sb->s_inodes_per_group * sizeof(inode) / BLOCKSIZE));
 	for (i = 0; i < GRP_NBGROUPS(fs); i++) {
 		printf("Group No: %d\n", i+1);
+		gd = get_gd(fs, i, &gi);
 		printf("block bitmap: block %d,inode bitmap: block %d, inode table: block %d\n",
-		     fs->gd[i].bg_block_bitmap, fs->gd[i].bg_inode_bitmap,
-		     fs->gd[i].bg_inode_table);
+		     gd->bg_block_bitmap,
+		     gd->bg_inode_bitmap,
+		     gd->bg_inode_table);
 		printf("block bitmap allocation:\n");
-		print_bm(GRP_GET_GROUP_BBM(fs, i),fs->sb.s_blocks_per_group);
+		print_bm(GRP_GET_GROUP_BBM(fs, gd, &bi),fs->sb->s_blocks_per_group);
+		GRP_PUT_GROUP_BBM(bi);
 		printf("inode bitmap allocation:\n");
-		ibm = GRP_GET_GROUP_IBM(fs, i);
-		print_bm(ibm, fs->sb.s_inodes_per_group);
-		for (i = 1; i <= fs->sb.s_inodes_per_group; i++)
+		ibm = GRP_GET_GROUP_IBM(fs, gd, &bi);
+		print_bm(ibm, fs->sb->s_inodes_per_group);
+		for (i = 1; i <= fs->sb->s_inodes_per_group; i++)
 			if (allocated(ibm, i))
 				print_inode(fs, i);
+		GRP_PUT_GROUP_IBM(bi);
+		put_gd(gi);
 	}
 }
 
 static void
-dump_fs(filesystem *fs, FILE * fh, int swapit)
+finish_fs(filesystem *fs)
 {
-	uint32 nbblocks = fs->sb.s_blocks_count;
-	fs->sb.s_reserved[200] = 0;
-	if(swapit)
-		swap_goodfs(fs);
-	if(fwrite(fs, BLOCKSIZE, nbblocks, fh) < nbblocks)
-		perror_msg_and_die("output filesystem image");
-	if(swapit)
-		swap_badfs(fs);
+	if (cache_flush(&fs->inodes))
+		error_msg_and_die("entry mismatch on inode cache flush");
+	if (cache_flush(&fs->blkmaps))
+		error_msg_and_die("entry mismatch on blockmap cache flush");
+	if (cache_flush(&fs->gds))
+		error_msg_and_die("entry mismatch on gd cache flush");
+	if (cache_flush(&fs->blks))
+		error_msg_and_die("entry mismatch on block cache flush");
+	if(fs->swapit)
+		swap_sb(fs->sb);
+	if (fseek(fs->f, SUPERBLOCK_OFFSET, SEEK_SET))
+		perror_msg_and_die("fseek");
+	if(fwrite(fs->sb, SUPERBLOCK_SIZE, 1, fs->f) != 1)
+		perror_msg_and_die("output filesystem superblock");
+	if(fs->swapit)
+		swap_sb(fs->sb);
 }
 
 static void
@@ -2419,10 +3146,12 @@
 	"  -x, --starting-image <image>\n"
 	"  -d, --root <directory>\n"
 	"  -D, --devtable <file>\n"
+	"  -B, --block-size <bytes>\n"
 	"  -b, --size-in-blocks <blocks>\n"
 	"  -i, --bytes-per-inode <bytes per inode>\n"
 	"  -N, --number-of-inodes <number of inodes>\n"
 	"  -m, --reserved-percentage <percentage of blocks to reserve>\n"
+	"  -o, --creator-os <os>      'linux' (default), 'hurd', 'freebsd' or number.\n"
 	"  -g, --block-map <path>     Generate a block map file for this path.\n"
 	"  -e, --fill-value <value>   Fill unallocated blocks with value.\n"
 	"  -z, --allow-holes          Allow files with holes.\n"
@@ -2444,15 +3173,34 @@
 extern char* optarg;
 extern int optind, opterr, optopt;
 
+// parse the value for -o <os>
+int
+lookup_creator_os(const char *name)
+{
+        if (isdigit (*name))
+                return atoi(name);
+        else if (strcasecmp(name, "linux") == 0)
+                return EXT2_OS_LINUX;
+        else if (strcasecmp(name, "GNU") == 0 || strcasecmp(name, "hurd") == 0)
+                return EXT2_OS_HURD;
+        else if (strcasecmp(name, "freebsd") == 0)
+                return EXT2_OS_FREEBSD;
+        else if (strcasecmp(name, "lites") == 0)
+                return EXT2_OS_LITES;
+        else
+                return EXT2_OS_LINUX;
+}
+
 int
 main(int argc, char **argv)
 {
-	int nbblocks = -1;
+	long long nbblocks = -1;
 	int nbinodes = -1;
 	int nbresrvd = -1;
 	float bytes_per_inode = -1;
 	float reserved_frac = -1;
 	int fs_timestamp = -1;
+	int creator_os = CREATOR_OS;
 	char * fsout = "-";
 	char * fsin = 0;
 	char * dopt[MAX_DOPT];
@@ -2466,6 +3214,7 @@
 	int squash_perms = 0;
 	uint16 endian = 1;
 	int bigendian = !*(char*)&endian;
+	char *volumelabel = NULL;
 	filesystem *fs;
 	int i;
 	int c;
@@ -2476,13 +3225,16 @@
 	  { "starting-image",	required_argument,	NULL, 'x' },
 	  { "root",		required_argument,	NULL, 'd' },
 	  { "devtable",		required_argument,	NULL, 'D' },
+	  { "block-size",	required_argument,	NULL, 'B' },
 	  { "size-in-blocks",	required_argument,	NULL, 'b' },
 	  { "bytes-per-inode",	required_argument,	NULL, 'i' },
 	  { "number-of-inodes",	required_argument,	NULL, 'N' },
+	  { "volume-label",     required_argument,      NULL, 'L' },
 	  { "reserved-percentage", required_argument,	NULL, 'm' },
+	  { "creator-os",	required_argument,	NULL, 'o' },
 	  { "block-map",	required_argument,	NULL, 'g' },
 	  { "fill-value",	required_argument,	NULL, 'e' },
-	  { "allow-holes",	no_argument, 		NULL, 'z' },
+	  { "allow-holes",	no_argument,		NULL, 'z' },
 	  { "faketime",		no_argument,		NULL, 'f' },
 	  { "squash",		no_argument,		NULL, 'q' },
 	  { "squash-uids",	no_argument,		NULL, 'U' },
@@ -2495,11 +3247,11 @@
 
 	app_name = argv[0];
 
-	while((c = getopt_long(argc, argv, "x:d:D:b:i:N:m:g:e:zfqUPhVv", longopts, NULL)) != EOF) {
+	while((c = getopt_long(argc, argv, "x:d:D:B:b:i:N:L:m:o:g:e:zfqUPhVv", longopts, NULL)) != EOF) {
 #else
 	app_name = argv[0];
 
-	while((c = getopt(argc, argv,      "x:d:D:b:i:N:m:g:e:zfqUPhVv")) != EOF) {
+	while((c = getopt(argc, argv,      "x:d:D:B:b:i:N:L:m:o:g:e:zfqUPhVv")) != EOF) {
 #endif /* HAVE_GETOPT_LONG */
 		switch(c)
 		{
@@ -2510,6 +3262,9 @@
 			case 'D':
 				dopt[didx++] = optarg;
 				break;
+			case 'B':
+				blocksize = SI_atof(optarg);
+				break;
 			case 'b':
 				nbblocks = SI_atof(optarg);
 				break;
@@ -2519,9 +3274,15 @@
 			case 'N':
 				nbinodes = SI_atof(optarg);
 				break;
+			case 'L':
+				volumelabel = optarg;
+				break;
 			case 'm':
 				reserved_frac = SI_atof(optarg) / 100;
 				break;
+			case 'o':
+				creator_os = lookup_creator_os(optarg);
+				break;
 			case 'g':
 				gopt[gidx++] = optarg;
 				break;
@@ -2565,21 +3326,21 @@
 		error_msg_and_die("Not enough arguments. Try --help or else see the man page.");
 	fsout = argv[optind];
 
-	hdlinks.hdl = (struct hdlink_s *)malloc(hdlink_cnt * sizeof(struct hdlink_s));
-	if (!hdlinks.hdl)
-		error_msg_and_die("Not enough memory");
-	hdlinks.count = 0 ;
+	if(blocksize != 1024 && blocksize != 2048 && blocksize != 4096)
+		error_msg_and_die("Valid block sizes: 1024, 2048 or 4096.");
+	if(creator_os < 0)
+		error_msg_and_die("Creator OS unknown.");
 
 	if(fsin)
 	{
 		if(strcmp(fsin, "-"))
 		{
 			FILE * fh = xfopen(fsin, "rb");
-			fs = load_fs(fh, bigendian);
+			fs = load_fs(fh, bigendian, fsout);
 			fclose(fh);
 		}
 		else
-			fs = load_fs(stdin, bigendian);
+			fs = load_fs(stdin, bigendian, fsout);
 	}
 	else
 	{
@@ -2609,16 +3370,29 @@
 		}
 		if(fs_timestamp == -1)
 			fs_timestamp = time(NULL);
-		fs = init_fs(nbblocks, nbinodes, nbresrvd, holes, fs_timestamp);
+		fs = init_fs(nbblocks, nbinodes, nbresrvd, holes,
+			     fs_timestamp, creator_os, bigendian, fsout);
 	}
+	if (volumelabel != NULL)
+		strncpy((char *)fs->sb->s_volume_name, volumelabel,
+			sizeof(fs->sb->s_volume_name));
 	
 	populate_fs(fs, dopt, didx, squash_uids, squash_perms, fs_timestamp, NULL);
 
 	if(emptyval) {
 		uint32 b;
-		for(b = 1; b < fs->sb.s_blocks_count; b++)
-			if(!allocated(GRP_GET_BLOCK_BITMAP(fs,b),GRP_BBM_OFFSET(fs,b)))
-				memset(get_blk(fs, b), emptyval, BLOCKSIZE);
+		for(b = 1; b < fs->sb->s_blocks_count; b++) {
+			blk_info *bi;
+			gd_info *gi;
+			if(!allocated(GRP_GET_BLOCK_BITMAP(fs,b,&bi,&gi),
+				      GRP_BBM_OFFSET(fs,b))) {
+				blk_info *bi2;
+				memset(get_blk(fs, b, &bi2), emptyval,
+				       BLOCKSIZE);
+				put_blk(bi2);
+			}
+			GRP_PUT_BLOCK_BITMAP(bi,gi);
+		}
 	}
 	if(verbose)
 		print_fs(fs);
@@ -2628,24 +3402,22 @@
 		char fname[MAX_FILENAME];
 		char *p;
 		FILE *fh;
+		nod_info *ni;
 		if(!(nod = find_path(fs, EXT2_ROOT_INO, gopt[i])))
 			error_msg_and_die("path %s not found in filesystem", gopt[i]);
 		while((p = strchr(gopt[i], '/')))
 			*p = '_';
 		SNPRINTF(fname, MAX_FILENAME-1, "%s.blk", gopt[i]);
 		fh = xfopen(fname, "wb");
-		fprintf(fh, "%d:", get_nod(fs, nod)->i_size);
+		fprintf(fh, "%d:", get_nod(fs, nod, &ni)->i_size);
+		put_nod(ni);
 		flist_blocks(fs, nod, fh);
 		fclose(fh);
 	}
-	if(strcmp(fsout, "-"))
-	{
-		FILE * fh = xfopen(fsout, "wb");
-		dump_fs(fs, fh, bigendian);
-		fclose(fh);
-	}
-	else
-		dump_fs(fs, stdout, bigendian);
+	finish_fs(fs);
+	if(strcmp(fsout, "-") == 0)
+		copy_file(fs, stdout, fs->f, fs->sb->s_blocks_count);
+
 	free_fs(fs);
 	return 0;
 }
Index: genext2fs-1.4.1/cache.h
===================================================================
--- /dev/null
+++ genext2fs-1.4.1/cache.h
@@ -0,0 +1,128 @@
+#ifndef __CACHE_H__
+#define __CACHE_H__
+
+#include "list.h"
+
+#define CACHE_LISTS 256
+
+typedef struct
+{
+    list_elem link;
+    list_elem lru_link;
+} cache_link;
+
+typedef struct
+{
+    /* LRU list holds unused items */
+    unsigned int lru_entries;
+    list_elem lru_list;
+    unsigned int max_free_entries;
+
+    unsigned int entries;
+    list_elem lists[CACHE_LISTS];
+    unsigned int (*elem_val)(cache_link *elem);
+    void (*freed)(cache_link *elem);
+} listcache;
+
+static inline void
+cache_add(listcache *c, cache_link *elem)
+{
+    unsigned int hash = c->elem_val(elem) % CACHE_LISTS;
+    int delcount = c->lru_entries - c->max_free_entries;
+
+    if (delcount > 0) {
+        /* Delete some unused items. */
+        list_elem *lru, *next;
+        cache_link *l;
+        list_for_each_elem_safe(&c->lru_list, lru, next) {
+            l = container_of(lru, cache_link, lru_link);
+            list_del(lru);
+            list_del(&l->link);
+            c->entries--;
+            c->lru_entries--;
+            c->freed(l);
+            delcount--;
+            if (delcount <= 0)
+                break;
+        }
+    }
+
+    c->entries++;
+    list_item_init(&elem->lru_link); /* Mark it not in the LRU list */
+    list_add_after(&c->lists[hash], &elem->link);
+}
+
+static inline void
+cache_item_set_unused(listcache *c, cache_link *elem)
+{
+    list_add_before(&c->lru_list, &elem->lru_link);
+    c->lru_entries++;
+}
+
+static inline cache_link *
+cache_find(listcache *c, unsigned int val)
+{
+    unsigned int hash = val % CACHE_LISTS;
+    list_elem *elem;
+
+    list_for_each_elem(&c->lists[hash], elem) {
+        cache_link *l = container_of(elem, cache_link, link);
+        if (c->elem_val(l) == val) {
+            if (!list_empty(&l->lru_link)) {
+                /* It's in the unused list, remove it. */
+                list_del(&l->lru_link);
+                list_item_init(&l->lru_link);
+                c->lru_entries--;
+            }
+            return l;
+        }
+    }
+    return NULL;
+}
+
+static inline int
+cache_flush(listcache *c)
+{
+    list_elem *elem, *next;
+    cache_link *l;
+    int i;
+
+    list_for_each_elem_safe(&c->lru_list, elem, next) {
+        l = container_of(elem, cache_link, lru_link);
+        list_del(elem);
+        list_del(&l->link);
+        c->entries--;
+        c->lru_entries--;
+        c->freed(l);
+    }
+
+    for (i = 0; i < CACHE_LISTS; i++) {
+        list_for_each_elem_safe(&c->lists[i], elem, next) {
+            l = container_of(elem, cache_link, link);
+            list_del(&l->link);
+            c->entries--;
+            c->freed(l);
+        }
+    }
+
+    return c->entries || c->lru_entries;
+}
+
+static inline void
+cache_init(listcache *c, unsigned int max_free_entries,
+       unsigned int (*elem_val)(cache_link *elem),
+       void (*freed)(cache_link *elem))
+{
+    int i;
+
+    c->entries = 0;
+    c->lru_entries = 0;
+    c->max_free_entries = max_free_entries;
+    list_init(&c->lru_list);
+    for (i = 0; i < CACHE_LISTS; i++)
+        list_init(&c->lists[i]);
+    c->elem_val = elem_val;
+    c->freed = freed;
+}
+
+#endif /* __CACHE_H__ */
Index: genext2fs-1.4.1/list.h
===================================================================
--- /dev/null
+++ genext2fs-1.4.1/list.h
@@ -0,0 +1,78 @@
+#ifndef __LIST_H__
+#define __LIST_H__
+
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+# if HAVE_STDDEF_H
+#  include <stddef.h>
+# endif
+#endif
+
+#ifndef offsetof
+#define offsetof(st, m) \
+     ((size_t) ( (char *)&((st *)(0))->m - (char *)0 ))
+#endif
+
+#define container_of(ptr, type, member) ({ \
+                const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+                (type *)( (char *)__mptr - offsetof(type,member) );})
+
+typedef struct list_elem
+{
+    struct list_elem *next;
+    struct list_elem *prev;
+} list_elem;
+
+static inline void list_init(list_elem *list)
+{
+    list->next = list;
+    list->prev = list;
+}
+
+static inline void list_add_after(list_elem *pos, list_elem *elem)
+{
+    elem->next = pos->next;
+    elem->prev = pos;
+    pos->next->prev = elem;
+    pos->next = elem;
+}
+
+static inline void list_add_before(list_elem *pos, list_elem *elem)
+{
+    elem->prev = pos->prev;
+    elem->next = pos;
+    pos->prev->next = elem;
+    pos->prev = elem;
+}
+
+static inline void list_del(list_elem *elem)
+{
+    elem->next->prev = elem->prev;
+    elem->prev->next = elem->next;
+}
+
+static inline void list_item_init(list_elem *elem)
+{
+    elem->next = elem;
+    elem->prev = elem;
+}
+
+static inline int list_empty(list_elem *elem)
+{
+    return elem->next == elem;
+}
+
+#define list_for_each_elem(list, curr)            \
+    for ((curr) = (list)->next; (curr) != (list); (curr) = (curr)->next)
+
+#define list_for_each_elem_safe(list, curr, next)    \
+    for ((curr) = (list)->next, (next) = (curr)->next;    \
+         (curr) != (list);                    \
+         (curr) = (next), (next) = (curr)->next)
+
+#endif /* __LIST_H__ */
